Пример #1
0
/* Subroutine */ int stimqp_(char *line, integer *nm, integer *mval, integer *
	nval, integer *nlda, integer *ldaval, real *timmin, real *a, real *
	copya, real *tau, real *work, integer *iwork, real *reslts, integer *
	ldr1, integer *ldr2, integer *nout, ftnlen line_len)
{
    /* Initialized data */

    static char subnam[6*1] = "SGEQPF";
    static integer modes[2] = { 2,3 };
    static integer iseed[4] = { 0,0,0,1 };

    /* Format strings */
    static char fmt_9999[] = "(1x,a6,\002 timing run not attempted\002,/)";
    static char fmt_9998[] = "(/\002 *** Speed of \002,a6,\002 in megaflops "
	    "***\002)";
    static char fmt_9997[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)";

    /* System generated locals */
    integer reslts_dim1, reslts_dim2, reslts_offset, i__1, i__2, i__3;

    /* Builtin functions   
       Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void),
	     s_wsle(cilist *), e_wsle(void);

    /* Local variables */
    static integer ilda;
    static real cond;
    static integer mode;
    static real dmax__;
    static integer info;
    static char path[3];
    static real time;
    static integer i__, m, n;
    static char cname[6];
    static integer imode, minmn;
    extern doublereal sopla_(char *, integer *, integer *, integer *, integer 
	    *, integer *);
    extern /* Subroutine */ int icopy_(integer *, integer *, integer *, 
	    integer *, integer *);
    static real s1, s2;
    static integer ic;
    extern /* Subroutine */ int sprtb5_(char *, char *, char *, integer *, 
	    integer *, integer *, integer *, integer *, integer *, real *, 
	    integer *, integer *, integer *, ftnlen, ftnlen, ftnlen);
    static integer im;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer 
	    *, integer *, integer *, integer *, integer *, ftnlen);
    extern doublereal second_(void);
    extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, 
	    logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), sgeqpf_(
	    integer *, integer *, real *, integer *, integer *, real *, real *
	    , integer *), slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *);
    extern doublereal smflop_(real *, real *, integer *);
    static real untime;
    static logical timsub[1];
    extern /* Subroutine */ int slatms_(integer *, integer *, char *, integer 
	    *, char *, real *, integer *, real *, real *, integer *, integer *
	    , char *, real *, integer *, real *, integer *);
    static integer lda, icl;
    static real ops;

    /* Fortran I/O blocks */
    static cilist io___8 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___27 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___28 = { 0, 0, 0, fmt_9997, 0 };
    static cilist io___29 = { 0, 0, 0, 0, 0 };



#define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6]
#define reslts_ref(a_1,a_2,a_3) reslts[((a_3)*reslts_dim2 + (a_2))*\
reslts_dim1 + a_1]


/*  -- LAPACK timing routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    STIMQP times the LAPACK routines to perform the QR factorization with   
    column pivoting of a REAL general matrix.   

    Two matrix types may be used for timing.  The number of types is   
    set in the parameter NMODE and the matrix types are set in the vector   
    MODES, using the following key:   
       2.  BREAK1    D(1:N-1)=1 and D(N)=1.0/COND in SLATMS   
       3.  GEOM      D(I)=COND**(-(I-1)/(N-1)) in SLATMS   
    These numbers are chosen to correspond with the matrix types in the   
    test code.   

    Arguments   
    =========   

    LINE    (input) CHARACTER*80   
            The input line that requested this routine.  The first six   
            characters contain either the name of a subroutine or a   
            generic path name.  The remaining characters may be used to   
            specify the individual routines to be timed.  See ATIMIN for   
            a full description of the format of the input line.   

    NM      (input) INTEGER   
            The number of values of M and N contained in the vectors   
            MVAL and NVAL.  The matrix sizes are used in pairs (M,N).   

    MVAL    (input) INTEGER array, dimension (NM)   
            The values of the matrix row dimension M.   

    NVAL    (input) INTEGER array, dimension (NM)   
            The values of the matrix column dimension N.   

    NLDA    (input) INTEGER   
            The number of values of LDA contained in the vector LDAVAL.   

    LDAVAL  (input) INTEGER array, dimension (NLDA)   
            The values of the leading dimension of the array A.   

    TIMMIN  (input) REAL   
            The minimum time a subroutine will be timed.   

    A       (workspace) REAL array, dimension (LDAMAX*NMAX)   
            where LDAMAX and NMAX are the maximum values of LDA and N.   

    COPYA   (workspace) REAL array, dimension (LDAMAX*NMAX)   

    TAU     (workspace) REAL array, dimension (min(M,N))   

    WORK    (workspace) REAL array, dimension (3*NMAX)   

    IWORK   (workspace) INTEGER array, dimension (2*NMAX)   

    RESLTS  (workspace) REAL array, dimension   
                        (LDR1,LDR2,NLDA)   
            The timing results for each subroutine over the relevant   
            values of MODE, (M,N), and LDA.   

    LDR1    (input) INTEGER   
            The first dimension of RESLTS.  LDR1 >= max(1,NM).   

    LDR2    (input) INTEGER   
            The second dimension of RESLTS.  LDR2 >= max(1,NM).   

    NOUT    (input) INTEGER   
            The unit number for output.   

    =====================================================================   

       Parameter adjustments */
    --mval;
    --nval;
    --ldaval;
    --a;
    --copya;
    --tau;
    --work;
    --iwork;
    reslts_dim1 = *ldr1;
    reslts_dim2 = *ldr2;
    reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * 1);
    reslts -= reslts_offset;

    /* Function Body   

       Extract the timing request from the input line. */

    s_copy(path, "Single precision", (ftnlen)1, (ftnlen)16);
    s_copy(path + 1, "QP", (ftnlen)2, (ftnlen)2);
    atimin_(path, line, &c__1, subnam, timsub, nout, &info, (ftnlen)3, (
	    ftnlen)80, (ftnlen)6);
    if (! timsub[0] || info != 0) {
	goto L80;
    }

/*     Check that M <= LDA for the input values. */

    s_copy(cname, line, (ftnlen)6, (ftnlen)6);
    atimck_(&c__1, cname, nm, &mval[1], nlda, &ldaval[1], nout, &info, (
	    ftnlen)6);
    if (info > 0) {
	io___8.ciunit = *nout;
	s_wsfe(&io___8);
	do_fio(&c__1, cname, (ftnlen)6);
	e_wsfe();
	goto L80;
    }

/*     Set the condition number and scaling factor for the matrices   
       to be generated. */

    dmax__ = 1.f;
    cond = 1.f / slamch_("Precision");

/*     Do for each pair of values (M,N): */

    i__1 = *nm;
    for (im = 1; im <= i__1; ++im) {
	m = mval[im];
	n = nval[im];
	minmn = min(m,n);

/*        Do for each value of LDA: */

	i__2 = *nlda;
	for (ilda = 1; ilda <= i__2; ++ilda) {
	    lda = ldaval[ilda];
	    for (imode = 1; imode <= 2; ++imode) {
		mode = modes[imode - 1];

/*              Generate a test matrix of size m by n using the   
                singular value distribution indicated by MODE. */

		i__3 = n;
		for (i__ = 1; i__ <= i__3; ++i__) {
		    iwork[n + i__] = 0;
/* L10: */
		}
		slatms_(&m, &n, "Uniform", iseed, "Nonsymm", &tau[1], &mode, &
			cond, &dmax__, &m, &n, "No packing", &copya[1], &lda, 
			&work[1], &info);

/*              SGEQPF:  QR factorization with column pivoting */

		slacpy_("All", &m, &n, &copya[1], &lda, &a[1], &lda);
		icopy_(&n, &iwork[n + 1], &c__1, &iwork[1], &c__1);
		ic = 0;
		s1 = second_();
L20:
		sgeqpf_(&m, &n, &a[1], &lda, &iwork[1], &tau[1], &work[1], &
			info);
		s2 = second_();
		time = s2 - s1;
		++ic;
		if (time < *timmin) {
		    slacpy_("All", &m, &n, &copya[1], &lda, &a[1], &lda);
		    icopy_(&n, &iwork[n + 1], &c__1, &iwork[1], &c__1);
		    goto L20;
		}

/*              Subtract the time used in SLACPY and ICOPY. */

		icl = 1;
		s1 = second_();
L30:
		s2 = second_();
		untime = s2 - s1;
		++icl;
		if (icl <= ic) {
		    slacpy_("All", &m, &n, &copya[1], &lda, &a[1], &lda);
		    icopy_(&n, &iwork[n + 1], &c__1, &iwork[1], &c__1);
		    goto L30;
		}

		time = (time - untime) / (real) ic;
		ops = sopla_("SGEQPF", &m, &n, &c__0, &c__0, &c__1)
			;
		reslts_ref(imode, im, ilda) = smflop_(&ops, &time, &info);

/* L40: */
	    }
/* L50: */
	}
/* L60: */
    }

/*     Print tables of results */

    io___27.ciunit = *nout;
    s_wsfe(&io___27);
    do_fio(&c__1, subnam_ref(0, 1), (ftnlen)6);
    e_wsfe();
    if (*nlda > 1) {
	i__1 = *nlda;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    io___28.ciunit = *nout;
	    s_wsfe(&io___28);
	    do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer));
	    do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer));
	    e_wsfe();
/* L70: */
	}
    }
    io___29.ciunit = *nout;
    s_wsle(&io___29);
    e_wsle();
    sprtb5_("Type", "M", "N", &c__2, modes, nm, &mval[1], &nval[1], nlda, &
	    reslts[reslts_offset], ldr1, ldr2, nout, (ftnlen)4, (ftnlen)1, (
	    ftnlen)1);
L80:
    return 0;

/*     End of STIMQP */

} /* stimqp_ */
Пример #2
0
/* Subroutine */ int dtimlq_(char *line, integer *nm, integer *mval, integer *
	nval, integer *nk, integer *kval, integer *nnb, integer *nbval, 
	integer *nxval, integer *nlda, integer *ldaval, doublereal *timmin, 
	doublereal *a, doublereal *tau, doublereal *b, doublereal *work, 
	doublereal *reslts, integer *ldr1, integer *ldr2, integer *ldr3, 
	integer *nout, ftnlen line_len)
{
    /* Initialized data */

    static char subnam[6*3] = "DGELQF" "DORGLQ" "DORMLQ";
    static char sides[1*2] = "L" "R";
    static char transs[1*2] = "N" "T";
    static integer iseed[4] = { 0,0,0,1 };

    /* Format strings */
    static char fmt_9999[] = "(1x,a6,\002 timing run not attempted\002,/)";
    static char fmt_9998[] = "(/\002 *** Speed of \002,a6,\002 in megaflops "
	    "***\002)";
    static char fmt_9997[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)";
    static char fmt_9996[] = "(5x,\002K = min(M,N)\002,/)";
    static char fmt_9995[] = "(/5x,a6,\002 with SIDE = '\002,a1,\002', TRANS"
	    " = '\002,a1,\002', \002,a1,\002 =\002,i6,/)";
    static char fmt_9994[] = "(\002 *** No pairs (M,N) found with M <= N: "
	    " \002,a6,\002 not timed\002)";

    /* System generated locals */
    integer reslts_dim1, reslts_dim2, reslts_dim3, reslts_offset, i__1, i__2, 
	    i__3, i__4, i__5, i__6;

    /* Builtin functions   
       Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void),
	     s_wsle(cilist *), e_wsle(void);

    /* Local variables */
    static integer ilda;
    static char labm[1], side[1];
    static integer info;
    static char path[3];
    static doublereal time;
    static integer isub, muse[12], nuse[12], i__, k, m, n;
    static char cname[6];
    static integer iside;
    extern doublereal dopla_(char *, integer *, integer *, integer *, integer 
	    *, integer *);
    static integer itoff, itran, minmn;
    extern /* Subroutine */ int icopy_(integer *, integer *, integer *, 
	    integer *, integer *);
    static char trans[1];
    static integer k1, i4, m1, n1;
    static doublereal s1, s2;
    extern /* Subroutine */ int dprtb4_(char *, char *, char *, integer *, 
	    integer *, integer *, integer *, integer *, integer *, integer *, 
	    doublereal *, integer *, integer *, integer *, ftnlen, ftnlen, 
	    ftnlen), dprtb5_(char *, char *, char *, integer *, integer *, 
	    integer *, integer *, integer *, integer *, doublereal *, integer 
	    *, integer *, integer *, ftnlen, ftnlen, ftnlen);
    static integer ic, nb, ik, im;
    extern doublereal dsecnd_(void);
    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *, integer *);
    static integer lw, nx, reseed[4];
    extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer 
	    *, integer *, integer *, integer *, integer *, ftnlen), dlacpy_(
	    char *, integer *, integer *, doublereal *, integer *, doublereal 
	    *, integer *);
    extern doublereal dmflop_(doublereal *, doublereal *, integer *);
    extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, 
	    logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), dtimmg_(
	    integer *, integer *, integer *, doublereal *, integer *, integer 
	    *, integer *), dlatms_(integer *, integer *, char *, integer *, 
	    char *, doublereal *, integer *, doublereal *, doublereal *, 
	    integer *, integer *, char *, doublereal *, integer *, doublereal 
	    *, integer *), dorglq_(integer *, integer 
	    *, integer *, doublereal *, integer *, doublereal *, doublereal *,
	     integer *, integer *), dormlq_(char *, char *, integer *, 
	    integer *, integer *, doublereal *, integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, integer *), xlaenv_(integer *, integer *);
    static doublereal untime;
    static logical timsub[3];
    static integer lda, icl, inb, imx;
    static doublereal ops;

    /* Fortran I/O blocks */
    static cilist io___9 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___29 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___31 = { 0, 0, 0, fmt_9997, 0 };
    static cilist io___32 = { 0, 0, 0, 0, 0 };
    static cilist io___33 = { 0, 0, 0, fmt_9996, 0 };
    static cilist io___34 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___49 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___50 = { 0, 0, 0, fmt_9997, 0 };
    static cilist io___51 = { 0, 0, 0, fmt_9995, 0 };
    static cilist io___53 = { 0, 0, 0, fmt_9995, 0 };
    static cilist io___54 = { 0, 0, 0, fmt_9994, 0 };



#define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6]
#define reslts_ref(a_1,a_2,a_3,a_4) reslts[(((a_4)*reslts_dim3 + (a_3))*\
reslts_dim2 + (a_2))*reslts_dim1 + a_1]


/*  -- LAPACK timing routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    DTIMLQ times the LAPACK routines to perform the LQ factorization of   
    a DOUBLE PRECISION general matrix.   

    Arguments   
    =========   

    LINE    (input) CHARACTER*80   
            The input line that requested this routine.  The first six   
            characters contain either the name of a subroutine or a   
            generic path name.  The remaining characters may be used to   
            specify the individual routines to be timed.  See ATIMIN for   
            a full description of the format of the input line.   

    NM      (input) INTEGER   
            The number of values of M and N contained in the vectors   
            MVAL and NVAL.  The matrix sizes are used in pairs (M,N).   

    MVAL    (input) INTEGER array, dimension (NM)   
            The values of the matrix row dimension M.   

    NVAL    (input) INTEGER array, dimension (NM)   
            The values of the matrix column dimension N.   

    NK      (input) INTEGER   
            The number of values of K in the vector KVAL.   

    KVAL    (input) INTEGER array, dimension (NK)   
            The values of the matrix dimension K, used in DORMLQ.   

    NNB     (input) INTEGER   
            The number of values of NB and NX contained in the   
            vectors NBVAL and NXVAL.  The blocking parameters are used   
            in pairs (NB,NX).   

    NBVAL   (input) INTEGER array, dimension (NNB)   
            The values of the blocksize NB.   

    NXVAL   (input) INTEGER array, dimension (NNB)   
            The values of the crossover point NX.   

    NLDA    (input) INTEGER   
            The number of values of LDA contained in the vector LDAVAL.   

    LDAVAL  (input) INTEGER array, dimension (NLDA)   
            The values of the leading dimension of the array A.   

    TIMMIN  (input) DOUBLE PRECISION   
            The minimum time a subroutine will be timed.   

    A       (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX)   
            where LDAMAX and NMAX are the maximum values of LDA and N.   

    TAU     (workspace) DOUBLE PRECISION array, dimension (min(M,N))   

    B       (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX)   

    WORK    (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NBMAX)   
            where NBMAX is the maximum value of NB.   

    RESLTS  (workspace) DOUBLE PRECISION array, dimension   
                        (LDR1,LDR2,LDR3,2*NK)   
            The timing results for each subroutine over the relevant   
            values of (M,N), (NB,NX), and LDA.   

    LDR1    (input) INTEGER   
            The first dimension of RESLTS.  LDR1 >= max(1,NNB).   

    LDR2    (input) INTEGER   
            The second dimension of RESLTS.  LDR2 >= max(1,NM).   

    LDR3    (input) INTEGER   
            The third dimension of RESLTS.  LDR3 >= max(1,NLDA).   

    NOUT    (input) INTEGER   
            The unit number for output.   

    Internal Parameters   
    ===================   

    MODE    INTEGER   
            The matrix type.  MODE = 3 is a geometric distribution of   
            eigenvalues.  See DLATMS for further details.   

    COND    DOUBLE PRECISION   
            The condition number of the matrix.  The singular values are   
            set to values from DMAX to DMAX/COND.   

    DMAX    DOUBLE PRECISION   
            The magnitude of the largest singular value.   

    =====================================================================   

       Parameter adjustments */
    --mval;
    --nval;
    --kval;
    --nbval;
    --nxval;
    --ldaval;
    --a;
    --tau;
    --b;
    --work;
    reslts_dim1 = *ldr1;
    reslts_dim2 = *ldr2;
    reslts_dim3 = *ldr3;
    reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * (1 + reslts_dim3 * 1)
	    );
    reslts -= reslts_offset;

    /* Function Body   

       Extract the timing request from the input line. */

    s_copy(path, "Double precision", (ftnlen)1, (ftnlen)16);
    s_copy(path + 1, "LQ", (ftnlen)2, (ftnlen)2);
    atimin_(path, line, &c__3, subnam, timsub, nout, &info, (ftnlen)3, (
	    ftnlen)80, (ftnlen)6);
    if (info != 0) {
	goto L230;
    }

/*     Check that M <= LDA for the input values. */

    s_copy(cname, line, (ftnlen)6, (ftnlen)6);
    atimck_(&c__1, cname, nm, &mval[1], nlda, &ldaval[1], nout, &info, (
	    ftnlen)6);
    if (info > 0) {
	io___9.ciunit = *nout;
	s_wsfe(&io___9);
	do_fio(&c__1, cname, (ftnlen)6);
	e_wsfe();
	goto L230;
    }

/*     Do for each pair of values (M,N): */

    i__1 = *nm;
    for (im = 1; im <= i__1; ++im) {
	m = mval[im];
	n = nval[im];
	minmn = min(m,n);
	icopy_(&c__4, iseed, &c__1, reseed, &c__1);

/*        Do for each value of LDA: */

	i__2 = *nlda;
	for (ilda = 1; ilda <= i__2; ++ilda) {
	    lda = ldaval[ilda];

/*           Do for each pair of values (NB, NX) in NBVAL and NXVAL. */

	    i__3 = *nnb;
	    for (inb = 1; inb <= i__3; ++inb) {
		nb = nbval[inb];
		xlaenv_(&c__1, &nb);
		nx = nxval[inb];
		xlaenv_(&c__3, &nx);
/* Computing MAX */
		i__4 = 1, i__5 = m * max(1,nb);
		lw = max(i__4,i__5);

/*              Generate a test matrix of size M by N. */

		icopy_(&c__4, reseed, &c__1, iseed, &c__1);
		dlatms_(&m, &n, "Uniform", iseed, "Nonsym", &tau[1], &c__3, &
			c_b24, &c_b25, &m, &n, "No packing", &b[1], &lda, &
			work[1], &info);

		if (timsub[0]) {

/*                 DGELQF:  LQ factorization */

		    dlacpy_("Full", &m, &n, &b[1], &lda, &a[1], &lda);
		    ic = 0;
		    s1 = dsecnd_();
L10:
		    dgelqf_(&m, &n, &a[1], &lda, &tau[1], &work[1], &lw, &
			    info);
		    s2 = dsecnd_();
		    time = s2 - s1;
		    ++ic;
		    if (time < *timmin) {
			dlacpy_("Full", &m, &n, &b[1], &lda, &a[1], &lda);
			goto L10;
		    }

/*                 Subtract the time used in DLACPY. */

		    icl = 1;
		    s1 = dsecnd_();
L20:
		    s2 = dsecnd_();
		    untime = s2 - s1;
		    ++icl;
		    if (icl <= ic) {
			dlacpy_("Full", &m, &n, &a[1], &lda, &b[1], &lda);
			goto L20;
		    }

		    time = (time - untime) / (doublereal) ic;
		    ops = dopla_("DGELQF", &m, &n, &c__0, &c__0, &nb);
		    reslts_ref(inb, im, ilda, 1) = dmflop_(&ops, &time, &info)
			    ;
		} else {

/*                 If DGELQF was not timed, generate a matrix and factor   
                   it using DGELQF anyway so that the factored form of   
                   the matrix can be used in timing the other routines. */

		    dlacpy_("Full", &m, &n, &b[1], &lda, &a[1], &lda);
		    dgelqf_(&m, &n, &a[1], &lda, &tau[1], &work[1], &lw, &
			    info);
		}

		if (timsub[1]) {

/*                 DORGLQ:  Generate orthogonal matrix Q from the LQ   
                   factorization */

		    dlacpy_("Full", &minmn, &n, &a[1], &lda, &b[1], &lda);
		    ic = 0;
		    s1 = dsecnd_();
L30:
		    dorglq_(&minmn, &n, &minmn, &b[1], &lda, &tau[1], &work[1]
			    , &lw, &info);
		    s2 = dsecnd_();
		    time = s2 - s1;
		    ++ic;
		    if (time < *timmin) {
			dlacpy_("Full", &minmn, &n, &a[1], &lda, &b[1], &lda);
			goto L30;
		    }

/*                 Subtract the time used in DLACPY. */

		    icl = 1;
		    s1 = dsecnd_();
L40:
		    s2 = dsecnd_();
		    untime = s2 - s1;
		    ++icl;
		    if (icl <= ic) {
			dlacpy_("Full", &minmn, &n, &a[1], &lda, &b[1], &lda);
			goto L40;
		    }

		    time = (time - untime) / (doublereal) ic;
		    ops = dopla_("DORGLQ", &minmn, &n, &minmn, &c__0, &nb);
		    reslts_ref(inb, im, ilda, 2) = dmflop_(&ops, &time, &info)
			    ;
		}

/* L50: */
	    }
/* L60: */
	}
/* L70: */
    }

/*     Print tables of results */

    for (isub = 1; isub <= 2; ++isub) {
	if (! timsub[isub - 1]) {
	    goto L90;
	}
	io___29.ciunit = *nout;
	s_wsfe(&io___29);
	do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6);
	e_wsfe();
	if (*nlda > 1) {
	    i__1 = *nlda;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		io___31.ciunit = *nout;
		s_wsfe(&io___31);
		do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer));
		e_wsfe();
/* L80: */
	    }
	}
	io___32.ciunit = *nout;
	s_wsle(&io___32);
	e_wsle();
	if (isub == 2) {
	    io___33.ciunit = *nout;
	    s_wsfe(&io___33);
	    e_wsfe();
	}
	dprtb4_("(  NB,  NX)", "M", "N", nnb, &nbval[1], &nxval[1], nm, &mval[
		1], &nval[1], nlda, &reslts_ref(1, 1, 1, isub), ldr1, ldr2, 
		nout, (ftnlen)11, (ftnlen)1, (ftnlen)1);
L90:
	;
    }

/*     Time DORMLQ separately.  Here the starting matrix is M by N, and   
       K is the free dimension of the matrix multiplied by Q. */

    if (timsub[2]) {

/*        Check that K <= LDA for the input values. */

	atimck_(&c__3, cname, nk, &kval[1], nlda, &ldaval[1], nout, &info, (
		ftnlen)6);
	if (info > 0) {
	    io___34.ciunit = *nout;
	    s_wsfe(&io___34);
	    do_fio(&c__1, subnam_ref(0, 3), (ftnlen)6);
	    e_wsfe();
	    goto L230;
	}

/*        Use only the pairs (M,N) where M <= N. */

	imx = 0;
	i__1 = *nm;
	for (im = 1; im <= i__1; ++im) {
	    if (mval[im] <= nval[im]) {
		++imx;
		muse[imx - 1] = mval[im];
		nuse[imx - 1] = nval[im];
	    }
/* L100: */
	}

/*        DORMLQ:  Multiply by Q stored as a product of elementary   
          transformations   

          Do for each pair of values (M,N): */

	i__1 = imx;
	for (im = 1; im <= i__1; ++im) {
	    m = muse[im - 1];
	    n = nuse[im - 1];

/*           Do for each value of LDA: */

	    i__2 = *nlda;
	    for (ilda = 1; ilda <= i__2; ++ilda) {
		lda = ldaval[ilda];

/*              Generate an M by N matrix and form its LQ decomposition. */

		dlatms_(&m, &n, "Uniform", iseed, "Nonsymm", &tau[1], &c__3, &
			c_b24, &c_b25, &m, &n, "No packing", &a[1], &lda, &
			work[1], &info);
/* Computing MAX */
		i__3 = 1, i__4 = m * max(1,nb);
		lw = max(i__3,i__4);
		dgelqf_(&m, &n, &a[1], &lda, &tau[1], &work[1], &lw, &info);

/*              Do first for SIDE = 'L', then for SIDE = 'R' */

		i4 = 0;
		for (iside = 1; iside <= 2; ++iside) {
		    *(unsigned char *)side = *(unsigned char *)&sides[iside - 
			    1];

/*                 Do for each pair of values (NB, NX) in NBVAL and   
                   NXVAL. */

		    i__3 = *nnb;
		    for (inb = 1; inb <= i__3; ++inb) {
			nb = nbval[inb];
			xlaenv_(&c__1, &nb);
			nx = nxval[inb];
			xlaenv_(&c__3, &nx);

/*                    Do for each value of K in KVAL */

			i__4 = *nk;
			for (ik = 1; ik <= i__4; ++ik) {
			    k = kval[ik];

/*                       Sort out which variable is which */

			    if (iside == 1) {
				k1 = m;
				m1 = n;
				n1 = k;
/* Computing MAX */
				i__5 = 1, i__6 = n1 * max(1,nb);
				lw = max(i__5,i__6);
			    } else {
				k1 = m;
				n1 = n;
				m1 = k;
/* Computing MAX */
				i__5 = 1, i__6 = m1 * max(1,nb);
				lw = max(i__5,i__6);
			    }

/*                       Do first for TRANS = 'N', then for TRANS = 'T' */

			    itoff = 0;
			    for (itran = 1; itran <= 2; ++itran) {
				*(unsigned char *)trans = *(unsigned char *)&
					transs[itran - 1];
				dtimmg_(&c__0, &m1, &n1, &b[1], &lda, &c__0, &
					c__0);
				ic = 0;
				s1 = dsecnd_();
L110:
				dormlq_(side, trans, &m1, &n1, &k1, &a[1], &
					lda, &tau[1], &b[1], &lda, &work[1], &
					lw, &info);
				s2 = dsecnd_();
				time = s2 - s1;
				++ic;
				if (time < *timmin) {
				    dtimmg_(&c__0, &m1, &n1, &b[1], &lda, &
					    c__0, &c__0);
				    goto L110;
				}

/*                          Subtract the time used in DTIMMG. */

				icl = 1;
				s1 = dsecnd_();
L120:
				s2 = dsecnd_();
				untime = s2 - s1;
				++icl;
				if (icl <= ic) {
				    dtimmg_(&c__0, &m1, &n1, &b[1], &lda, &
					    c__0, &c__0);
				    goto L120;
				}

				time = (time - untime) / (doublereal) ic;
				i__5 = iside - 1;
				ops = dopla_("DORMLQ", &m1, &n1, &k1, &i__5, &
					nb);
				reslts_ref(inb, im, ilda, i4 + itoff + ik) = 
					dmflop_(&ops, &time, &info);
				itoff = *nk;
/* L130: */
			    }
/* L140: */
			}
/* L150: */
		    }
		    i4 = *nk << 1;
/* L160: */
		}
/* L170: */
	    }
/* L180: */
	}

/*        Print tables of results */

	isub = 3;
	i4 = 1;
	if (imx >= 1) {
	    for (iside = 1; iside <= 2; ++iside) {
		*(unsigned char *)side = *(unsigned char *)&sides[iside - 1];
		if (iside == 1) {
		    io___49.ciunit = *nout;
		    s_wsfe(&io___49);
		    do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6);
		    e_wsfe();
		    if (*nlda > 1) {
			i__1 = *nlda;
			for (i__ = 1; i__ <= i__1; ++i__) {
			    io___50.ciunit = *nout;
			    s_wsfe(&io___50);
			    do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(
				    integer));
			    do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)
				    sizeof(integer));
			    e_wsfe();
/* L190: */
			}
		    }
		}
		for (itran = 1; itran <= 2; ++itran) {
		    *(unsigned char *)trans = *(unsigned char *)&transs[itran 
			    - 1];
		    i__1 = *nk;
		    for (ik = 1; ik <= i__1; ++ik) {
			if (iside == 1) {
			    n = kval[ik];
			    io___51.ciunit = *nout;
			    s_wsfe(&io___51);
			    do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6);
			    do_fio(&c__1, side, (ftnlen)1);
			    do_fio(&c__1, trans, (ftnlen)1);
			    do_fio(&c__1, "N", (ftnlen)1);
			    do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer))
				    ;
			    e_wsfe();
			    *(unsigned char *)labm = 'M';
			} else {
			    m = kval[ik];
			    io___53.ciunit = *nout;
			    s_wsfe(&io___53);
			    do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6);
			    do_fio(&c__1, side, (ftnlen)1);
			    do_fio(&c__1, trans, (ftnlen)1);
			    do_fio(&c__1, "M", (ftnlen)1);
			    do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer))
				    ;
			    e_wsfe();
			    *(unsigned char *)labm = 'N';
			}
			dprtb5_("NB", "K", labm, nnb, &nbval[1], &imx, muse, 
				nuse, nlda, &reslts_ref(1, 1, 1, i4), ldr1, 
				ldr2, nout, (ftnlen)2, (ftnlen)1, (ftnlen)1);
			++i4;
/* L200: */
		    }
/* L210: */
		}
/* L220: */
	    }
	} else {
	    io___54.ciunit = *nout;
	    s_wsfe(&io___54);
	    do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6);
	    e_wsfe();
	}
    }
L230:
    return 0;

/*     End of DTIMLQ */

} /* dtimlq_ */
Пример #3
0
/* Subroutine */ int ztimtd_(char *line, integer *nm, integer *mval, integer *
	nn, integer *nval, integer *nnb, integer *nbval, integer *nxval, 
	integer *nlda, integer *ldaval, doublereal *timmin, doublecomplex *a, 
	doublecomplex *b, doublereal *d__, doublecomplex *tau, doublecomplex *
	work, doublereal *reslts, integer *ldr1, integer *ldr2, integer *ldr3,
	 integer *nout, ftnlen line_len)
{
    /* Initialized data */

    static char subnam[6*3] = "ZHETRD" "ZUNGTR" "ZUNMTR";
    static char sides[1*2] = "L" "R";
    static char transs[1*2] = "N" "C";
    static char uplos[1*2] = "U" "L";
    static integer iseed[4] = { 0,0,0,1 };

    /* Format strings */
    static char fmt_9999[] = "(1x,a6,\002 timing run not attempted\002,/)";
    static char fmt_9998[] = "(/\002 *** Speed of \002,a6,\002 in megaflops "
	    "*** \002)";
    static char fmt_9997[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)";
    static char fmt_9996[] = "(/5x,a6,\002 with UPLO = '\002,a1,\002'\002,/)";
    static char fmt_9995[] = "(/5x,a6,\002 with SIDE = '\002,a1,\002', UPLO "
	    "= '\002,a1,\002', TRANS = '\002,a1,\002', \002,a1,\002 =\002,i6,"
	    "/)";

    /* System generated locals */
    integer reslts_dim1, reslts_dim2, reslts_dim3, reslts_offset, i__1, i__2, 
	    i__3, i__4, i__5, i__6;

    /* Builtin functions   
       Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);

    /* Local variables */
    static integer ilda;
    static char side[1];
    static integer info;
    static char path[3];
    static doublereal time;
    static integer isub;
    static char uplo[1];
    static integer i__, m, n;
    static char cname[6];
    static integer iside;
    extern doublereal dopla_(char *, integer *, integer *, integer *, integer 
	    *, integer *);
    static integer itoff, itran;
    extern /* Subroutine */ int icopy_(integer *, integer *, integer *, 
	    integer *, integer *);
    static char trans[1];
    static integer iuplo, i3, i4, m1, n1;
    static doublereal s1, s2;
    extern /* Subroutine */ int dprtb3_(char *, char *, integer *, integer *, 
	    integer *, integer *, integer *, integer *, doublereal *, integer 
	    *, integer *, integer *, ftnlen, ftnlen);
    static integer ic, nb, im, in;
    extern doublereal dsecnd_(void);
    static integer lw, nx, reseed[4];
    extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer 
	    *, integer *, integer *, integer *, integer *, ftnlen);
    extern doublereal dmflop_(doublereal *, doublereal *, integer *);
    extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, 
	    logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), dprtbl_(
	    char *, char *, integer *, integer *, integer *, integer *, 
	    integer *, doublereal *, integer *, integer *, integer *, ftnlen, 
	    ftnlen), xlaenv_(integer *, integer *), zhetrd_(char *, integer *,
	     doublecomplex *, integer *, doublereal *, doublereal *, 
	    doublecomplex *, doublecomplex *, integer *, integer *);
    static doublereal untime;
    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *);
    static logical timsub[3];
    extern /* Subroutine */ int ztimmg_(integer *, integer *, integer *, 
	    doublecomplex *, integer *, integer *, integer *), zlatms_(
	    integer *, integer *, char *, integer *, char *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *, integer *, char 
	    *, doublecomplex *, integer *, doublecomplex *, integer *), zungtr_(char *, integer *, doublecomplex *, 
	    integer *, doublecomplex *, doublecomplex *, integer *, integer *), zunmtr_(char *, char *, char *, integer *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
	    integer *, doublecomplex *, integer *, integer *);
    static integer lda, icl, inb;
    static doublereal ops;
    static char lab1[1], lab2[1];

    /* Fortran I/O blocks */
    static cilist io___10 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___11 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___42 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___44 = { 0, 0, 0, fmt_9997, 0 };
    static cilist io___45 = { 0, 0, 0, fmt_9996, 0 };
    static cilist io___48 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___49 = { 0, 0, 0, fmt_9997, 0 };
    static cilist io___50 = { 0, 0, 0, fmt_9995, 0 };



#define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6]
#define reslts_ref(a_1,a_2,a_3,a_4) reslts[(((a_4)*reslts_dim3 + (a_3))*\
reslts_dim2 + (a_2))*reslts_dim1 + a_1]


/*  -- LAPACK timing routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    ZTIMTD times the LAPACK routines ZHETRD, ZUNGTR, and CUNMTR.   

    Arguments   
    =========   

    LINE    (input) CHARACTER*80   
            The input line that requested this routine.  The first six   
            characters contain either the name of a subroutine or a   
            generic path name.  The remaining characters may be used to   
            specify the individual routines to be timed.  See ATIMIN for   
            a full description of the format of the input line.   

    NM      (input) INTEGER   
            The number of values of M contained in the vector MVAL.   

    MVAL    (input) INTEGER array, dimension (NM)   
            The values of the matrix size M.   

    NN      (input) INTEGER   
            The number of values of N contained in the vector NVAL.   

    NVAL    (input) INTEGER array, dimension (NN)   
            The values of the matrix column dimension N.   

    NNB     (input) INTEGER   
            The number of values of NB and NX contained in the   
            vectors NBVAL and NXVAL.  The blocking parameters are used   
            in pairs (NB,NX).   

    NBVAL   (input) INTEGER array, dimension (NNB)   
            The values of the blocksize NB.   

    NXVAL   (input) INTEGER array, dimension (NNB)   
            The values of the crossover point NX.   

    NLDA    (input) INTEGER   
            The number of values of LDA contained in the vector LDAVAL.   

    LDAVAL  (input) INTEGER array, dimension (NLDA)   
            The values of the leading dimension of the array A.   

    TIMMIN  (input) DOUBLE PRECISION   
            The minimum time a subroutine will be timed.   

    A       (workspace) COMPLEX*16 array, dimension (LDAMAX*NMAX)   
            where LDAMAX and NMAX are the maximum values of LDA and N.   

    B       (workspace) COMPLEX*16 array, dimension (LDAMAX*NMAX)   

    D       (workspace) DOUBLE PRECISION array, dimension (2*NMAX-1)   

    TAU     (workspace) COMPLEX*16 array, dimension (NMAX)   

    WORK    (workspace) COMPLEX*16 array, dimension (NMAX*NBMAX)   
            where NBMAX is the maximum value of NB.   

    RESLTS  (workspace) DOUBLE PRECISION array, dimension   
                        (LDR1,LDR2,LDR3,4*NN+3)   
            The timing results for each subroutine over the relevant   
            values of M, (NB,NX), LDA, and N.   

    LDR1    (input) INTEGER   
            The first dimension of RESLTS.  LDR1 >= max(1,NNB).   

    LDR2    (input) INTEGER   
            The second dimension of RESLTS.  LDR2 >= max(1,NM).   

    LDR3    (input) INTEGER   
            The third dimension of RESLTS.  LDR3 >= max(1,2*NLDA).   

    NOUT    (input) INTEGER   
            The unit number for output.   

    Internal Parameters   
    ===================   

    MODE    INTEGER   
            The matrix type.  MODE = 3 is a geometric distribution of   
            eigenvalues.  See ZLATMS for further details.   

    COND    DOUBLE PRECISION   
            The condition number of the matrix.  The singular values are   
            set to values from DMAX to DMAX/COND.   

    DMAX    DOUBLE PRECISION   
            The magnitude of the largest singular value.   

    =====================================================================   

       Parameter adjustments */
    --mval;
    --nval;
    --nbval;
    --nxval;
    --ldaval;
    --a;
    --b;
    --d__;
    --tau;
    --work;
    reslts_dim1 = *ldr1;
    reslts_dim2 = *ldr2;
    reslts_dim3 = *ldr3;
    reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * (1 + reslts_dim3 * 1)
	    );
    reslts -= reslts_offset;

    /* Function Body   

       Extract the timing request from the input line. */

    s_copy(path, "Zomplex precision", (ftnlen)1, (ftnlen)17);
    s_copy(path + 1, "TD", (ftnlen)2, (ftnlen)2);
    atimin_(path, line, &c__3, subnam, timsub, nout, &info, (ftnlen)3, (
	    ftnlen)80, (ftnlen)6);
    if (info != 0) {
	goto L220;
    }

/*     Check that M <= LDA for the input values. */

    s_copy(cname, line, (ftnlen)6, (ftnlen)6);
    atimck_(&c__2, cname, nm, &mval[1], nlda, &ldaval[1], nout, &info, (
	    ftnlen)6);
    if (info > 0) {
	io___10.ciunit = *nout;
	s_wsfe(&io___10);
	do_fio(&c__1, cname, (ftnlen)6);
	e_wsfe();
	goto L220;
    }

/*     Check that K <= LDA for ZUNMTR */

    if (timsub[2]) {
	atimck_(&c__3, cname, nn, &nval[1], nlda, &ldaval[1], nout, &info, (
		ftnlen)6);
	if (info > 0) {
	    io___11.ciunit = *nout;
	    s_wsfe(&io___11);
	    do_fio(&c__1, subnam_ref(0, 3), (ftnlen)6);
	    e_wsfe();
	    timsub[2] = FALSE_;
	}
    }

/*     Do first for UPLO = 'U', then for UPLO = 'L' */

    for (iuplo = 1; iuplo <= 2; ++iuplo) {
	*(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1];

/*        Do for each value of M: */

	i__1 = *nm;
	for (im = 1; im <= i__1; ++im) {
	    m = mval[im];
	    icopy_(&c__4, iseed, &c__1, reseed, &c__1);

/*           Do for each value of LDA: */

	    i__2 = *nlda;
	    for (ilda = 1; ilda <= i__2; ++ilda) {
		lda = ldaval[ilda];
		i3 = (iuplo - 1) * *nlda + ilda;

/*              Do for each pair of values (NB, NX) in NBVAL and NXVAL. */

		i__3 = *nnb;
		for (inb = 1; inb <= i__3; ++inb) {
		    nb = nbval[inb];
		    xlaenv_(&c__1, &nb);
		    nx = nxval[inb];
		    xlaenv_(&c__3, &nx);
/* Computing MAX */
		    i__4 = 1, i__5 = m * max(1,nb);
		    lw = max(i__4,i__5);

/*                 Generate a test matrix of order M. */

		    icopy_(&c__4, reseed, &c__1, iseed, &c__1);
		    zlatms_(&m, &m, "Uniform", iseed, "Symmetric", &d__[1], &
			    c__3, &c_b27, &c_b28, &m, &m, "No packing", &b[1],
			     &lda, &work[1], &info);

		    if (timsub[0]) {

/*                    ZHETRD:  Reduction to tridiagonal form */

			zlacpy_(uplo, &m, &m, &b[1], &lda, &a[1], &lda);
			ic = 0;
			s1 = dsecnd_();
L10:
			zhetrd_(uplo, &m, &a[1], &lda, &d__[1], &d__[m + 1], &
				tau[1], &work[1], &lw, &info);
			s2 = dsecnd_();
			time = s2 - s1;
			++ic;
			if (time < *timmin) {
			    zlacpy_(uplo, &m, &m, &b[1], &lda, &a[1], &lda);
			    goto L10;
			}

/*                    Subtract the time used in ZLACPY. */

			icl = 1;
			s1 = dsecnd_();
L20:
			s2 = dsecnd_();
			untime = s2 - s1;
			++icl;
			if (icl <= ic) {
			    zlacpy_(uplo, &m, &m, &a[1], &lda, &b[1], &lda);
			    goto L20;
			}

			time = (time - untime) / (doublereal) ic;
			ops = dopla_("ZHETRD", &m, &m, &c_n1, &c_n1, &nb);
			reslts_ref(inb, im, i3, 1) = dmflop_(&ops, &time, &
				info);
		    } else {

/*                    If ZHETRD was not timed, generate a matrix and   
                      factor it using ZHETRD anyway so that the factored   
                      form of the matrix can be used in timing the other   
                      routines. */

			zlacpy_(uplo, &m, &m, &b[1], &lda, &a[1], &lda);
			zhetrd_(uplo, &m, &a[1], &lda, &d__[1], &d__[m + 1], &
				tau[1], &work[1], &lw, &info);
		    }

		    if (timsub[1]) {

/*                    ZUNGTR:  Generate the orthogonal matrix Q from the   
                      reduction to Hessenberg form A = Q*H*Q' */

			zlacpy_(uplo, &m, &m, &a[1], &lda, &b[1], &lda);
			ic = 0;
			s1 = dsecnd_();
L30:
			zungtr_(uplo, &m, &b[1], &lda, &tau[1], &work[1], &lw,
				 &info);
			s2 = dsecnd_();
			time = s2 - s1;
			++ic;
			if (time < *timmin) {
			    zlacpy_(uplo, &m, &m, &a[1], &lda, &b[1], &lda);
			    goto L30;
			}

/*                    Subtract the time used in ZLACPY. */

			icl = 1;
			s1 = dsecnd_();
L40:
			s2 = dsecnd_();
			untime = s2 - s1;
			++icl;
			if (icl <= ic) {
			    zlacpy_(uplo, &m, &m, &a[1], &lda, &b[1], &lda);
			    goto L40;
			}

			time = (time - untime) / (doublereal) ic;

/*                    Op count for ZUNGTR:  same as   
                         ZUNGQR( N-1, N-1, N-1, ... ) */

			i__4 = m - 1;
			i__5 = m - 1;
			i__6 = m - 1;
			ops = dopla_("ZUNGQR", &i__4, &i__5, &i__6, &c_n1, &
				nb);
			reslts_ref(inb, im, i3, 2) = dmflop_(&ops, &time, &
				info);
		    }

		    if (timsub[2]) {

/*                    ZUNMTR:  Multiply by Q stored as a product of   
                      elementary transformations */

			i4 = 2;
			for (iside = 1; iside <= 2; ++iside) {
			    *(unsigned char *)side = *(unsigned char *)&sides[
				    iside - 1];
			    i__4 = *nn;
			    for (in = 1; in <= i__4; ++in) {
				n = nval[in];
/* Computing MAX */
				i__5 = 1, i__6 = max(1,nb) * n;
				lw = max(i__5,i__6);
				if (iside == 1) {
				    m1 = m;
				    n1 = n;
				} else {
				    m1 = n;
				    n1 = m;
				}
				itoff = 0;
				for (itran = 1; itran <= 2; ++itran) {
				    *(unsigned char *)trans = *(unsigned char 
					    *)&transs[itran - 1];
				    ztimmg_(&c__0, &m1, &n1, &b[1], &lda, &
					    c__0, &c__0);
				    ic = 0;
				    s1 = dsecnd_();
L50:
				    zunmtr_(side, uplo, trans, &m1, &n1, &a[1]
					    , &lda, &tau[1], &b[1], &lda, &
					    work[1], &lw, &info);
				    s2 = dsecnd_();
				    time = s2 - s1;
				    ++ic;
				    if (time < *timmin) {
					ztimmg_(&c__0, &m1, &n1, &b[1], &lda, 
						&c__0, &c__0);
					goto L50;
				    }

/*                             Subtract the time used in ZTIMMG. */

				    icl = 1;
				    s1 = dsecnd_();
L60:
				    s2 = dsecnd_();
				    untime = s2 - s1;
				    ++icl;
				    if (icl <= ic) {
					ztimmg_(&c__0, &m1, &n1, &b[1], &lda, 
						&c__0, &c__0);
					goto L60;
				    }

				    time = (time - untime) / (doublereal) ic;

/*                             Op count for ZUNMTR, SIDE='L':  same as   
                                  ZUNMQR( 'L', TRANS, M-1, N, M-1, ...)   

                               Op count for ZUNMTR, SIDE='R':  same as   
                                  ZUNMQR( 'R', TRANS, M, N-1, N-1, ...) */

				    if (iside == 1) {
					i__5 = m1 - 1;
					i__6 = m1 - 1;
					ops = dopla_("ZUNMQR", &i__5, &n1, &
						i__6, &c_n1, &nb);
				    } else {
					i__5 = n1 - 1;
					i__6 = n1 - 1;
					ops = dopla_("ZUNMQR", &m1, &i__5, &
						i__6, &c__1, &nb);
				    }

				    reslts_ref(inb, im, i3, i4 + itoff + in) =
					     dmflop_(&ops, &time, &info);
				    itoff = *nn;
/* L70: */
				}
/* L80: */
			    }
			    i4 += *nn << 1;
/* L90: */
			}
		    }

/* L100: */
		}
/* L110: */
	    }
/* L120: */
	}
/* L130: */
    }

/*     Print tables of results for ZHETRD and ZUNGTR */

    for (isub = 1; isub <= 2; ++isub) {
	if (! timsub[isub - 1]) {
	    goto L160;
	}
	io___42.ciunit = *nout;
	s_wsfe(&io___42);
	do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6);
	e_wsfe();
	if (*nlda > 1) {
	    i__1 = *nlda;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		io___44.ciunit = *nout;
		s_wsfe(&io___44);
		do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer));
		e_wsfe();
/* L140: */
	    }
	}
	i3 = 1;
	for (iuplo = 1; iuplo <= 2; ++iuplo) {
	    io___45.ciunit = *nout;
	    s_wsfe(&io___45);
	    do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6);
	    do_fio(&c__1, uplos + (iuplo - 1), (ftnlen)1);
	    e_wsfe();
	    dprtb3_("(  NB,  NX)", "N", nnb, &nbval[1], &nxval[1], nm, &mval[
		    1], nlda, &reslts_ref(1, 1, i3, isub), ldr1, ldr2, nout, (
		    ftnlen)11, (ftnlen)1);
	    i3 += *nlda;
/* L150: */
	}
L160:
	;
    }

/*     Print tables of results for ZUNMTR */

    isub = 3;
    if (timsub[isub - 1]) {
	i4 = 2;
	for (iside = 1; iside <= 2; ++iside) {
	    if (iside == 1) {
		*(unsigned char *)lab1 = 'M';
		*(unsigned char *)lab2 = 'N';
		if (*nlda > 1) {
		    io___48.ciunit = *nout;
		    s_wsfe(&io___48);
		    do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6);
		    e_wsfe();
		    i__1 = *nlda;
		    for (i__ = 1; i__ <= i__1; ++i__) {
			io___49.ciunit = *nout;
			s_wsfe(&io___49);
			do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(
				integer));
			e_wsfe();
/* L170: */
		    }
		}
	    } else {
		*(unsigned char *)lab1 = 'N';
		*(unsigned char *)lab2 = 'M';
	    }
	    for (itran = 1; itran <= 2; ++itran) {
		i__1 = *nn;
		for (in = 1; in <= i__1; ++in) {
		    i3 = 1;
		    for (iuplo = 1; iuplo <= 2; ++iuplo) {
			io___50.ciunit = *nout;
			s_wsfe(&io___50);
			do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6);
			do_fio(&c__1, sides + (iside - 1), (ftnlen)1);
			do_fio(&c__1, uplos + (iuplo - 1), (ftnlen)1);
			do_fio(&c__1, transs + (itran - 1), (ftnlen)1);
			do_fio(&c__1, lab2, (ftnlen)1);
			do_fio(&c__1, (char *)&nval[in], (ftnlen)sizeof(
				integer));
			e_wsfe();
			dprtbl_("NB", lab1, nnb, &nbval[1], nm, &mval[1], 
				nlda, &reslts_ref(1, 1, i3, i4 + in), ldr1, 
				ldr2, nout, (ftnlen)2, (ftnlen)1);
			i3 += *nlda;
/* L180: */
		    }
/* L190: */
		}
		i4 += *nn;
/* L200: */
	    }
/* L210: */
	}
    }
L220:

/*     Print a table of results for each timed routine. */

    return 0;

/*     End of ZTIMTD */

} /* ztimtd_ */
Пример #4
0
/* Subroutine */ int dchkq3_(logical *dotype, integer *nm, integer *mval, 
	integer *nn, integer *nval, integer *nnb, integer *nbval, integer *
	nxval, doublereal *thresh, doublereal *a, doublereal *copya, 
	doublereal *s, doublereal *copys, doublereal *tau, doublereal *work, 
	integer *iwork, integer *nout)
{
    /* Initialized data */

    static integer iseedy[4] = { 1988,1989,1990,1991 };

    /* Format strings */
    static char fmt_9999[] = "(1x,a,\002 M =\002,i5,\002, N =\002,i5,\002, N"
	    "B =\002,i4,\002, type \002,i2,\002, test \002,i2,\002, ratio "
	    "=\002,g12.5)";

    /* System generated locals */
    integer i__1, i__2, i__3, i__4, i__5;
    doublereal d__1;

    /* Builtin functions */
    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);

    /* Local variables */
    integer i__, k, m, n, nb, im, in, lw, nx, lda, inb;
    doublereal eps;
    integer mode, info;
    char path[3];
    integer ilow, nrun;
    extern /* Subroutine */ int alahd_(integer *, char *);
    integer ihigh, nfail, iseed[4], imode;
    extern doublereal dqpt01_(integer *, integer *, integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    integer *), dqrt11_(integer *, integer *, doublereal *, integer *, 
	     doublereal *, doublereal *, integer *), dqrt12_(integer *, 
	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
	    integer *);
    integer mnmin;
    extern /* Subroutine */ int icopy_(integer *, integer *, integer *, 
	    integer *, integer *);
    integer istep, nerrs, lwork;
    extern /* Subroutine */ int dgeqp3_(integer *, integer *, doublereal *, 
	    integer *, integer *, doublereal *, doublereal *, integer *, 
	    integer *);
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int dlaord_(char *, integer *, doublereal *, 
	    integer *), dlacpy_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, integer *), 
	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *, 
	    doublereal *, integer *), alasum_(char *, integer *, 
	    integer *, integer *, integer *), dlatms_(integer *, 
	    integer *, char *, integer *, char *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *, integer *, char *, 
	    doublereal *, integer *, doublereal *, integer *), xlaenv_(integer *, integer *);
    doublereal result[3];

    /* Fortran I/O blocks */
    static cilist io___28 = { 0, 0, 0, fmt_9999, 0 };



/*  -- LAPACK test routine (version 3.1.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     January 2007 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  DCHKQ3 tests DGEQP3. */

/*  Arguments */
/*  ========= */

/*  DOTYPE  (input) LOGICAL array, dimension (NTYPES) */
/*          The matrix types to be used for testing.  Matrices of type j */
/*          (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = */
/*          .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. */

/*  NM      (input) INTEGER */
/*          The number of values of M contained in the vector MVAL. */

/*  MVAL    (input) INTEGER array, dimension (NM) */
/*          The values of the matrix row dimension M. */

/*  NN      (input) INTEGER */
/*          The number of values of N contained in the vector NVAL. */

/*  NVAL    (input) INTEGER array, dimension (NN) */
/*          The values of the matrix column dimension N. */

/*  NNB     (input) INTEGER */
/*          The number of values of NB and NX contained in the */
/*          vectors NBVAL and NXVAL.  The blocking parameters are used */
/*          in pairs (NB,NX). */

/*  NBVAL   (input) INTEGER array, dimension (NNB) */
/*          The values of the blocksize NB. */

/*  NXVAL   (input) INTEGER array, dimension (NNB) */
/*          The values of the crossover point NX. */

/*  THRESH  (input) DOUBLE PRECISION */
/*          The threshold value for the test ratios.  A result is */
/*          included in the output file if RESULT >= THRESH.  To have */
/*          every test ratio printed, use THRESH = 0. */

/*  A       (workspace) DOUBLE PRECISION array, dimension (MMAX*NMAX) */
/*          where MMAX is the maximum value of M in MVAL and NMAX is the */
/*          maximum value of N in NVAL. */

/*  COPYA   (workspace) DOUBLE PRECISION array, dimension (MMAX*NMAX) */

/*  S       (workspace) DOUBLE PRECISION array, dimension */
/*                      (min(MMAX,NMAX)) */

/*  COPYS   (workspace) DOUBLE PRECISION array, dimension */
/*                      (min(MMAX,NMAX)) */

/*  TAU     (workspace) DOUBLE PRECISION array, dimension (MMAX) */

/*  WORK    (workspace) DOUBLE PRECISION array, dimension */
/*                      (MMAX*NMAX + 4*NMAX + MMAX) */

/*  IWORK   (workspace) INTEGER array, dimension (2*NMAX) */

/*  NOUT    (input) INTEGER */
/*          The unit number for output. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Scalars in Common .. */
/*     .. */
/*     .. Common blocks .. */
/*     .. */
/*     .. Data statements .. */
    /* Parameter adjustments */
    --iwork;
    --work;
    --tau;
    --copys;
    --s;
    --copya;
    --a;
    --nxval;
    --nbval;
    --nval;
    --mval;
    --dotype;

    /* Function Body */
/*     .. */
/*     .. Executable Statements .. */

/*     Initialize constants and the random number seed. */

    s_copy(path, "Double precision", (ftnlen)1, (ftnlen)16);
    s_copy(path + 1, "Q3", (ftnlen)2, (ftnlen)2);
    nrun = 0;
    nfail = 0;
    nerrs = 0;
    for (i__ = 1; i__ <= 4; ++i__) {
	iseed[i__ - 1] = iseedy[i__ - 1];
/* L10: */
    }
    eps = dlamch_("Epsilon");
    infoc_1.infot = 0;

    i__1 = *nm;
    for (im = 1; im <= i__1; ++im) {

/*        Do for each value of M in MVAL. */

	m = mval[im];
	lda = max(1,m);

	i__2 = *nn;
	for (in = 1; in <= i__2; ++in) {

/*           Do for each value of N in NVAL. */

	    n = nval[in];
	    mnmin = min(m,n);
/* Computing MAX */
	    i__3 = 1, i__4 = m * max(m,n) + (mnmin << 2) + max(m,n), i__3 = 
		    max(i__3,i__4), i__4 = m * n + (mnmin << 1) + (n << 2);
	    lwork = max(i__3,i__4);

	    for (imode = 1; imode <= 6; ++imode) {
		if (! dotype[imode]) {
		    goto L70;
		}

/*              Do for each type of matrix */
/*                 1:  zero matrix */
/*                 2:  one small singular value */
/*                 3:  geometric distribution of singular values */
/*                 4:  first n/2 columns fixed */
/*                 5:  last n/2 columns fixed */
/*                 6:  every second column fixed */

		mode = imode;
		if (imode > 3) {
		    mode = 1;
		}

/*              Generate test matrix of size m by n using */
/*              singular value distribution indicated by `mode'. */

		i__3 = n;
		for (i__ = 1; i__ <= i__3; ++i__) {
		    iwork[i__] = 0;
/* L20: */
		}
		if (imode == 1) {
		    dlaset_("Full", &m, &n, &c_b11, &c_b11, &copya[1], &lda);
		    i__3 = mnmin;
		    for (i__ = 1; i__ <= i__3; ++i__) {
			copys[i__] = 0.;
/* L30: */
		    }
		} else {
		    d__1 = 1. / eps;
		    dlatms_(&m, &n, "Uniform", iseed, "Nonsymm", &copys[1], &
			    mode, &d__1, &c_b16, &m, &n, "No packing", &copya[
			    1], &lda, &work[1], &info);
		    if (imode >= 4) {
			if (imode == 4) {
			    ilow = 1;
			    istep = 1;
/* Computing MAX */
			    i__3 = 1, i__4 = n / 2;
			    ihigh = max(i__3,i__4);
			} else if (imode == 5) {
/* Computing MAX */
			    i__3 = 1, i__4 = n / 2;
			    ilow = max(i__3,i__4);
			    istep = 1;
			    ihigh = n;
			} else if (imode == 6) {
			    ilow = 1;
			    istep = 2;
			    ihigh = n;
			}
			i__3 = ihigh;
			i__4 = istep;
			for (i__ = ilow; i__4 < 0 ? i__ >= i__3 : i__ <= i__3;
				 i__ += i__4) {
			    iwork[i__] = 1;
/* L40: */
			}
		    }
		    dlaord_("Decreasing", &mnmin, &copys[1], &c__1);
		}

		i__4 = *nnb;
		for (inb = 1; inb <= i__4; ++inb) {

/*                 Do for each pair of values (NB,NX) in NBVAL and NXVAL. */

		    nb = nbval[inb];
		    xlaenv_(&c__1, &nb);
		    nx = nxval[inb];
		    xlaenv_(&c__3, &nx);

/*                 Get a working copy of COPYA into A and a copy of */
/*                 vector IWORK. */

		    dlacpy_("All", &m, &n, &copya[1], &lda, &a[1], &lda);
		    icopy_(&n, &iwork[1], &c__1, &iwork[n + 1], &c__1);

/*                 Compute the QR factorization with pivoting of A */

/* Computing MAX */
		    i__3 = 1, i__5 = (n << 1) + nb * (n + 1);
		    lw = max(i__3,i__5);

/*                 Compute the QP3 factorization of A */

		    s_copy(srnamc_1.srnamt, "DGEQP3", (ftnlen)32, (ftnlen)6);
		    dgeqp3_(&m, &n, &a[1], &lda, &iwork[n + 1], &tau[1], &
			    work[1], &lw, &info);

/*                 Compute norm(svd(a) - svd(r)) */

		    result[0] = dqrt12_(&m, &n, &a[1], &lda, &copys[1], &work[
			    1], &lwork);

/*                 Compute norm( A*P - Q*R ) */

		    result[1] = dqpt01_(&m, &n, &mnmin, &copya[1], &a[1], &
			    lda, &tau[1], &iwork[n + 1], &work[1], &lwork);

/*                 Compute Q'*Q */

		    result[2] = dqrt11_(&m, &mnmin, &a[1], &lda, &tau[1], &
			    work[1], &lwork);

/*                 Print information about the tests that did not pass */
/*                 the threshold. */

		    for (k = 1; k <= 3; ++k) {
			if (result[k - 1] >= *thresh) {
			    if (nfail == 0 && nerrs == 0) {
				alahd_(nout, path);
			    }
			    io___28.ciunit = *nout;
			    s_wsfe(&io___28);
			    do_fio(&c__1, "DGEQP3", (ftnlen)6);
			    do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer))
				    ;
			    do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer))
				    ;
			    do_fio(&c__1, (char *)&nb, (ftnlen)sizeof(integer)
				    );
			    do_fio(&c__1, (char *)&imode, (ftnlen)sizeof(
				    integer));
			    do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer))
				    ;
			    do_fio(&c__1, (char *)&result[k - 1], (ftnlen)
				    sizeof(doublereal));
			    e_wsfe();
			    ++nfail;
			}
/* L50: */
		    }
		    nrun += 3;

/* L60: */
		}
L70:
		;
	    }
/* L80: */
	}
/* L90: */
    }

/*     Print a summary of the results. */

    alasum_(path, nout, &nfail, &nrun, &nerrs);


/*     End of DCHKQ3 */

    return 0;
} /* dchkq3_ */