/* Main program */ MAIN__(void) { /* Format strings */ static char fmt_9983[] = "(\002 LAPACK VERSION 3.0, released June 30, 19" "99 \002,/)"; static char fmt_9992[] = "(\002 The following parameter values will be u" "sed:\002)"; static char fmt_9999[] = "(\002 Too many values of \002,a,\002 using " "\002,a,\002 = \002,i2)"; static char fmt_9991[] = "(4x,a7,1x,10i6,/12x,10i6)"; static char fmt_9997[] = "(\002 *** \002,a1,\002 = \002,i7,\002 is too b" "ig: \002,\002maximum allowed is\002,i7)"; static char fmt_9998[] = "(\002 *** LDA = \002,i7,\002 is too small, mus" "t have \002,\002LDA > 0.\002)"; static char fmt_9995[] = "(\002 *** LDA*N is too big for the dense routi" "nes \002,\002(LDA =\002,i6,\002, N =\002,i6,\002)\002,/\002 --> " "Increase LA to at least \002,i8)"; static char fmt_9994[] = "(\002 *** (LDA+K)*M is too big for the band ro" "utines \002,\002(LDA=\002,i6,\002, M=\002,i6,\002, K=\002,i6," "\002)\002,/\002 --> Increase LA to at least \002,i8)"; static char fmt_9996[] = "(\002 *** N*NB is too big for N =\002,i6,\002," " NB =\002,i6,/\002 --> Increase LA to at least \002,i8)"; static char fmt_9984[] = "(/\002 Tests not done due to input errors\002)"; static char fmt_9993[] = "(\002 The minimum time a subroutine will be ti" "med = \002,f6.3,\002 seconds\002)"; static char fmt_9990[] = "(/\002 ------------------------------\002,/" "\002 >>>>> Sample BLAS <<<<<\002,/\002 ------------------" "------------\002)"; static char fmt_9989[] = "(1x,a6,\002 not timed due to input errors\002," "/)"; static char fmt_9988[] = "(/\002 ------------------------------\002,/" "\002 >>>>> Timing data <<<<<\002,/\002 ------------------" "------------\002)"; static char fmt_9987[] = "(1x,a6,\002: Unrecognized path or subroutine " "name\002,/)"; static char fmt_9986[] = "(\002 End of tests\002)"; static char fmt_9985[] = "(\002 Total time used = \002,f12.2,\002 seco" "nds\002)"; /* System generated locals */ integer i__1, i__2; doublereal d__1; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), s_rsfe(cilist *), do_fio(integer * , char *, ftnlen), e_rsfe(void), s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), s_wsle(cilist *), e_wsle(void); /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); /* Local variables */ static integer need, nlda; static logical blas; static char line[80]; static integer kval[6], mval[6], maxk, nval[6], maxm, maxn; static doublecomplex work[270336] /* was [512][528] */, a[817152] /* was [272384][3] */, b[817152] /* was [272384][3] */; static doublereal d__[1024]; static doublecomplex e[1024]; static integer i__, l; static doublereal s[1024]; static logical ldaok; extern logical lsame_(char *, char *); static integer nbval[6], maxnb, mkmax; static char c1[1], c2[2], c3[3]; static integer nxval[6], i2, j2, iwork[10000]; static doublereal s1, s2, rwork[76812]; extern /* Subroutine */ int ztimb2_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *, integer *, ftnlen), ztimb3_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex * , doublereal *, integer *, integer *, integer *, ftnlen), ztimq3_( char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, ftnlen); static integer nk, nm, nn; extern doublereal dsecnd_(void); static integer ldaval[4]; static logical ldamok, ldanok; static integer maxlda; extern logical lsamen_(integer *, char *, char *); static doublereal flptbl[1088640], opctbl[1088640], timtbl[1088640]; extern /* Subroutine */ int ztimgb_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen); static doublereal timmin; static logical nxnbok; extern /* Subroutine */ int ztimge_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimhe_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimpb_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimbr_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimtb_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimhp_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen); static doublereal reslts[6912] /* was [6][6][8][24] */; extern /* Subroutine */ int ztimhr_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimgt_(char *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimmm_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal * , integer *, integer *, integer *, ftnlen, ftnlen), ztimlq_(char * , integer *, integer *, integer *, integer *, integer *, integer * , integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublereal *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimql_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimls_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublecomplex *, doublereal *, integer *, integer *, ftnlen), ztimpo_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimpp_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimmv_(char *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *, integer *, ftnlen), ztimpt_(char *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimqp_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, ftnlen), ztimqr_( char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimrq_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimsp_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimtd_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimtp_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimtr_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), ztimsy_(char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen); static integer nnb; static logical mok, nok; static integer ldr1, ldr2, ldr3; /* Fortran I/O blocks */ static cilist io___5 = { 0, 6, 0, fmt_9983, 0 }; static cilist io___6 = { 0, 5, 0, "( A80 )", 0 }; static cilist io___10 = { 0, 6, 0, "( 1X, A, / )", 0 }; static cilist io___11 = { 0, 6, 0, fmt_9992, 0 }; static cilist io___12 = { 0, 5, 0, 0, 0 }; static cilist io___14 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___18 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___21 = { 0, 6, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 6, 0, 0, 0 }; static cilist io___23 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___31 = { 0, 6, 0, fmt_9997, 0 }; static cilist io___32 = { 0, 6, 0, 0, 0 }; static cilist io___33 = { 0, 5, 0, 0, 0 }; static cilist io___35 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___36 = { 0, 5, 0, 0, 0 }; static cilist io___38 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___41 = { 0, 5, 0, 0, 0 }; static cilist io___43 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 5, 0, 0, 0 }; static cilist io___47 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___49 = { 0, 5, 0, 0, 0 }; static cilist io___50 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___51 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___52 = { 0, 5, 0, 0, 0 }; static cilist io___54 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___55 = { 0, 5, 0, 0, 0 }; static cilist io___57 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___60 = { 0, 6, 0, fmt_9998, 0 }; static cilist io___61 = { 0, 6, 0, 0, 0 }; static cilist io___64 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___66 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___68 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___69 = { 0, 6, 0, fmt_9984, 0 }; static cilist io___70 = { 0, 6, 0, 0, 0 }; static cilist io___71 = { 0, 6, 0, 0, 0 }; static cilist io___72 = { 0, 5, 0, 0, 0 }; static cilist io___74 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___75 = { 0, 6, 0, 0, 0 }; static cilist io___76 = { 0, 5, 1, "(A)", 0 }; static cilist io___77 = { 0, 5, 1, "(A)", 0 }; static cilist io___78 = { 0, 6, 0, fmt_9990, 0 }; static cilist io___83 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___84 = { 0, 5, 1, "(A)", 0 }; static cilist io___85 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___89 = { 0, 6, 0, fmt_9987, 0 }; static cilist io___93 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___94 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___95 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___103 = { 0, 6, 0, fmt_9987, 0 }; static cilist io___104 = { 0, 5, 1, "(A)", 0 }; static cilist io___106 = { 0, 6, 0, fmt_9986, 0 }; static cilist io___107 = { 0, 6, 0, fmt_9985, 0 }; #define a_subscr(a_1,a_2) (a_2)*272384 + a_1 - 272385 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*272384 + a_1 - 272385 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= ZTIMAA is the timing program for the COMPLEX*16 LAPACK routines. This program collects performance data for the factor, solve, and inverse routines used in solving systems of linear equations, and also for the orthogonal factorization and reduction routines used in solving least squares problems and matrix eigenvalue problems. The subprograms call a DOUBLE PRECISION function DSECND with no arguments which is assumed to return the central-processor time in seconds from some fixed starting time. The program is driven by a short data file, which specifies values for the matrix dimensions M, N and K, for the blocking parameters NB and NX, and for the leading array dimension LDA. A minimum time for each subroutine is included for timing small problems or for obtaining results on a machine with an inaccurate DSECND function. The matrix dimensions M, N, and K correspond to the three dimensions m, n, and k in the Level 3 BLAS. When timing the LAPACK routines for square matrices, M and N correspond to the matrix dimensions m and n, and K is the number of right-hand sides (nrhs) for the solves. When timing the LAPACK routines for band matrices, M is the matrix order m, N is the half-bandwidth (kl, ku, or kd in the LAPACK notation), and K is again the number of right-hand sides. The first 13 records of the data file are read using list-directed input. The first line of input is printed as the first line of output and can be used to identify different sets of results. To assist with debugging an input file, the values are printed out as they are read in. The following records are read using the format (A). For these records, the first 6 characters are reserved for the path or subroutine name. If a path name is used, the characters after the path name indicate the routines in the path to be timed, where 'T' or 't' means 'Time this routine'. If the line is blank after the path name, all routines in the path are timed. If fewer characters appear than routines in a path, the remaining characters are assumed to be 'F'. For example, the following 3 lines are equivalent ways of requesting timing of ZGETRF: ZGE T F F ZGE T ZGETRF An annotated example of a data file can be obtained by deleting the first 3 characters from the following 32 lines: LAPACK timing, COMPLEX*16 square matrices 5 Number of values of M 100 200 300 400 500 Values of M (row dimension) 5 Number of values of N 100 200 300 400 500 Values of N (column dimension) 2 Number of values of K 100 400 Values of K 5 Number of values of NB 1 16 32 48 64 Values of NB (blocksize) 0 48 128 128 128 Values of NX (crossover point) 2 Number of values of LDA 512 513 Values of LDA (leading dimension) 0.0 Minimum time in seconds ZGE T T T ZPO T T T ZPP T T T ZHE T T T ZHP T T T ZSY T T T ZSP T T T ZTR T T ZTP T T ZQR T T F ZLQ T T F ZQL T T F ZRQ T T F ZQP T ZHR T T F F ZTD T T F F ZBR T F F ZLS T T T T T T The routines are timed for all combinations of applicable values of M, N, K, NB, NX, and LDA, and for all combinations of options such as UPLO and TRANS. For Level 2 BLAS timings, values of NB are used for INCX. Certain subroutines, such as the QR factorization, treat the values of M and N as ordered pairs and operate on M x N matrices. Internal Parameters =================== NMAX INTEGER The maximum value of M or N for square matrices. LDAMAX INTEGER The maximum value of LDA. NMAXB INTEGER The maximum value of N for band matrices. MAXVAL INTEGER The maximum number of values that can be read in for M, N, K, NB, or NX. MXNLDA INTEGER The maximum number of values that can be read in for LDA. NIN INTEGER The unit number for input. Currently set to 5 (std input). NOUT INTEGER The unit number for output. Currently set to 6 (std output). ===================================================================== */ s1 = dsecnd_(); ldr1 = 6; ldr2 = 6; ldr3 = 8; s_wsfe(&io___5); e_wsfe(); /* Read the first line. The first four characters must be 'BLAS' for the BLAS data file format to be used. Otherwise, the LAPACK data file format is assumed. */ s_rsfe(&io___6); do_fio(&c__1, line, (ftnlen)80); e_rsfe(); blas = lsamen_(&c__4, line, "BLAS"); /* Find the last non-blank and print the first line of input as the first line of output. */ for (l = 80; l >= 1; --l) { if (*(unsigned char *)&line[l - 1] != ' ') { goto L20; } /* L10: */ } l = 1; L20: s_wsfe(&io___10); do_fio(&c__1, line, l); e_wsfe(); s_wsfe(&io___11); e_wsfe(); /* Read in NM and the values for M. */ s_rsle(&io___12); do_lio(&c__3, &c__1, (char *)&nm, (ftnlen)sizeof(integer)); e_rsle(); if (nm > 6) { s_wsfe(&io___14); do_fio(&c__1, "M", (ftnlen)1); do_fio(&c__1, "NM", (ftnlen)2); do_fio(&c__1, (char *)&c__6, (ftnlen)sizeof(integer)); e_wsfe(); nm = 6; } s_rsle(&io___15); i__1 = nm; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); s_wsfe(&io___18); do_fio(&c__1, "M: ", (ftnlen)7); i__1 = nm; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); /* Check that M <= NMAXB for all values of M. */ mok = TRUE_; maxm = 0; i__1 = nm; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ i__2 = mval[i__ - 1]; maxm = max(i__2,maxm); if (mval[i__ - 1] > 5000) { s_wsfe(&io___21); do_fio(&c__1, "M", (ftnlen)1); do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__5000, (ftnlen)sizeof(integer)); e_wsfe(); mok = FALSE_; } /* L30: */ } if (! mok) { s_wsle(&io___22); e_wsle(); } /* Read in NN and the values for N. */ s_rsle(&io___23); do_lio(&c__3, &c__1, (char *)&nn, (ftnlen)sizeof(integer)); e_rsle(); if (nn > 6) { s_wsfe(&io___25); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, "NN", (ftnlen)2); do_fio(&c__1, (char *)&c__6, (ftnlen)sizeof(integer)); e_wsfe(); nn = 6; } s_rsle(&io___26); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); s_wsfe(&io___28); do_fio(&c__1, "N: ", (ftnlen)7); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); /* Check that N <= NMAXB for all values of N. */ nok = TRUE_; maxn = 0; i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ i__2 = nval[i__ - 1]; maxn = max(i__2,maxn); if (nval[i__ - 1] > 5000) { s_wsfe(&io___31); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__5000, (ftnlen)sizeof(integer)); e_wsfe(); nok = FALSE_; } /* L40: */ } if (! nok) { s_wsle(&io___32); e_wsle(); } /* Read in NK and the values for K. */ s_rsle(&io___33); do_lio(&c__3, &c__1, (char *)&nk, (ftnlen)sizeof(integer)); e_rsle(); if (nk > 6) { s_wsfe(&io___35); do_fio(&c__1, "K", (ftnlen)1); do_fio(&c__1, "NK", (ftnlen)2); do_fio(&c__1, (char *)&c__6, (ftnlen)sizeof(integer)); e_wsfe(); nk = 6; } s_rsle(&io___36); i__1 = nk; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&kval[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); s_wsfe(&io___38); do_fio(&c__1, "K: ", (ftnlen)7); i__1 = nk; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&kval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); /* Find the maximum value of K (= NRHS). */ maxk = 0; i__1 = nk; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ i__2 = kval[i__ - 1]; maxk = max(i__2,maxk); /* L50: */ } mkmax = maxm * max(2,maxk); /* Read in NNB and the values for NB. For the BLAS input files, NBVAL is used to store values for INCX and INCY. */ s_rsle(&io___41); do_lio(&c__3, &c__1, (char *)&nnb, (ftnlen)sizeof(integer)); e_rsle(); if (nnb > 6) { s_wsfe(&io___43); do_fio(&c__1, "NB", (ftnlen)2); do_fio(&c__1, "NNB", (ftnlen)3); do_fio(&c__1, (char *)&c__6, (ftnlen)sizeof(integer)); e_wsfe(); nnb = 6; } s_rsle(&io___44); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nbval[i__ - 1], (ftnlen)sizeof(integer)) ; } e_rsle(); /* Find the maximum value of NB. */ maxnb = 0; i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ i__2 = nbval[i__ - 1]; maxnb = max(i__2,maxnb); /* L60: */ } if (blas) { s_wsfe(&io___47); do_fio(&c__1, "INCX: ", (ftnlen)7); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nbval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { nxval[i__ - 1] = 0; /* L70: */ } } else { /* LAPACK data files: Read in the values for NX. */ s_rsle(&io___49); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nxval[i__ - 1], (ftnlen)sizeof( integer)); } e_rsle(); s_wsfe(&io___50); do_fio(&c__1, "NB: ", (ftnlen)7); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nbval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); s_wsfe(&io___51); do_fio(&c__1, "NX: ", (ftnlen)7); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nxval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Read in NLDA and the values for LDA. */ s_rsle(&io___52); do_lio(&c__3, &c__1, (char *)&nlda, (ftnlen)sizeof(integer)); e_rsle(); if (nlda > 4) { s_wsfe(&io___54); do_fio(&c__1, "LDA", (ftnlen)3); do_fio(&c__1, "NLDA", (ftnlen)4); do_fio(&c__1, (char *)&c__4, (ftnlen)sizeof(integer)); e_wsfe(); nlda = 4; } s_rsle(&io___55); i__1 = nlda; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&ldaval[i__ - 1], (ftnlen)sizeof(integer) ); } e_rsle(); s_wsfe(&io___57); do_fio(&c__1, "LDA: ", (ftnlen)7); i__1 = nlda; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&ldaval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); /* Check that LDA >= 1 for all values of LDA. */ ldaok = TRUE_; maxlda = 0; i__1 = nlda; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ i__2 = ldaval[i__ - 1]; maxlda = max(i__2,maxlda); if (ldaval[i__ - 1] <= 0) { s_wsfe(&io___60); do_fio(&c__1, (char *)&ldaval[i__ - 1], (ftnlen)sizeof(integer)); e_wsfe(); ldaok = FALSE_; } /* L80: */ } if (! ldaok) { s_wsle(&io___61); e_wsle(); } /* Check that MAXLDA*MAXN <= LA (for the dense routines). */ ldanok = TRUE_; need = maxlda * maxn; if (need > 272384) { s_wsfe(&io___64); do_fio(&c__1, (char *)&maxlda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&maxn, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&need, (ftnlen)sizeof(integer)); e_wsfe(); ldanok = FALSE_; } /* Check that MAXLDA*MAXM + MAXM*MAXK <= 3*LA (for band routines). */ ldamok = TRUE_; need = maxlda * maxm + maxm * maxk; if (need > 817152) { need = (need + 2) / 3; s_wsfe(&io___66); do_fio(&c__1, (char *)&maxlda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&maxm, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&maxk, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&need, (ftnlen)sizeof(integer)); e_wsfe(); ldamok = FALSE_; } /* Check that MAXN*MAXNB (or MAXN*INCX) <= LA. */ nxnbok = TRUE_; need = maxn * maxnb; if (need > 272384) { s_wsfe(&io___68); do_fio(&c__1, (char *)&maxn, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&maxnb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&need, (ftnlen)sizeof(integer)); e_wsfe(); nxnbok = FALSE_; } if (! (mok && nok && ldaok && ldanok && nxnbok)) { s_wsfe(&io___69); e_wsfe(); goto L110; } if (! ldamok) { s_wsle(&io___70); e_wsle(); } /* Read the minimum time to time a subroutine. */ s_wsle(&io___71); e_wsle(); s_rsle(&io___72); do_lio(&c__5, &c__1, (char *)&timmin, (ftnlen)sizeof(doublereal)); e_rsle(); s_wsfe(&io___74); do_fio(&c__1, (char *)&timmin, (ftnlen)sizeof(doublereal)); e_wsfe(); s_wsle(&io___75); e_wsle(); /* Read the first input line. */ i__1 = s_rsfe(&io___76); if (i__1 != 0) { goto L100; } i__1 = do_fio(&c__1, line, (ftnlen)80); if (i__1 != 0) { goto L100; } i__1 = e_rsfe(); if (i__1 != 0) { goto L100; } /* If the first record is the special signal 'NONE', then get the next line but don't time ZGEMV and CGEMM. */ if (lsamen_(&c__4, line, "NONE")) { i__1 = s_rsfe(&io___77); if (i__1 != 0) { goto L100; } i__1 = do_fio(&c__1, line, (ftnlen)80); if (i__1 != 0) { goto L100; } i__1 = e_rsfe(); if (i__1 != 0) { goto L100; } } else { s_wsfe(&io___78); e_wsfe(); /* If the first record is the special signal 'BAND', then time the band routine ZGBMV and ZGEMM with N = K. */ if (lsamen_(&c__4, line, "BAND")) { if (ldamok) { if (mkmax > 272384) { i2 = 544768 - mkmax + 1; j2 = 2; } else { i2 = 272384 - mkmax + 1; j2 = 3; } i__1 = mkmax / 2; ztimmv_("ZGBMV ", &nm, mval, &nn, nval, &nlda, ldaval, & timmin, &a_ref(1, 1), &i__1, &a_ref(i2, j2), &a_ref( 272384 - mkmax / 2 + 1, 3), reslts, &ldr1, &ldr2, & c__6, (ftnlen)6); } else { s_wsfe(&io___83); do_fio(&c__1, "ZGBMV ", (ftnlen)6); e_wsfe(); } ztimmm_("ZGEMM ", "K", &nn, nval, &nlda, ldaval, &timmin, &a_ref( 1, 1), &a_ref(1, 2), &a_ref(1, 3), reslts, &ldr1, &ldr2, & c__6, (ftnlen)6, (ftnlen)1); i__1 = s_rsfe(&io___84); if (i__1 != 0) { goto L100; } i__1 = do_fio(&c__1, line, (ftnlen)80); if (i__1 != 0) { goto L100; } i__1 = e_rsfe(); if (i__1 != 0) { goto L100; } } else { /* Otherwise time ZGEMV and CGEMM. */ ztimmv_("ZGEMV ", &nn, nval, &nnb, nbval, &nlda, ldaval, &timmin, &a_ref(1, 1), &c_b172, &a_ref(1, 2), &a_ref(1, 3), reslts, &ldr1, &ldr2, &c__6, (ftnlen)6); ztimmm_("ZGEMM ", "N", &nn, nval, &nlda, ldaval, &timmin, &a_ref( 1, 1), &a_ref(1, 2), &a_ref(1, 3), reslts, &ldr1, &ldr2, & c__6, (ftnlen)6, (ftnlen)1); } } /* Call the appropriate timing routine for each input line. */ s_wsfe(&io___85); e_wsfe(); L90: *(unsigned char *)c1 = *(unsigned char *)line; s_copy(c2, line + 1, (ftnlen)2, (ftnlen)2); s_copy(c3, line + 3, (ftnlen)3, (ftnlen)3); /* Check first character for correct precision. */ if (! lsame_(c1, "Zomplex precision")) { s_wsfe(&io___89); do_fio(&c__1, line, (ftnlen)6); e_wsfe(); } else if (lsamen_(&c__2, c2, "B2") || lsamen_(& c__3, c3, "MV ") || lsamen_(&c__3, c3, "SV ") || lsamen_(&c__3, c3, "R ") || lsamen_(&c__3, c3, "RC ") || lsamen_(&c__3, c3, "RU ") || lsamen_(& c__3, c3, "R2 ")) { /* Level 2 BLAS */ ztimb2_(line, &nm, mval, &nn, nval, &nk, kval, &nnb, nbval, &nlda, ldaval, &c_b172, &timmin, &a_ref(1, 1), &a_ref(1, 2), &a_ref( 1, 3), reslts, &ldr1, &ldr2, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "B3") || lsamen_(& c__3, c3, "MM ") || lsamen_(&c__3, c3, "SM ") || lsamen_(&c__3, c3, "RK ") || lsamen_(&c__3, c3, "R2K")) { /* Level 3 BLAS */ ztimb3_(line, &nm, mval, &nn, nval, &nk, kval, &nlda, ldaval, &timmin, &a_ref(1, 1), &a_ref(1, 2), &a_ref(1, 3), reslts, &ldr1, & ldr2, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "QR") || lsamen_(& c__2, c3, "QR") || lsamen_(&c__2, c3 + 1, "QR")) { /* QR routines */ ztimqr_(line, &nn, mval, nval, &nk, kval, &nnb, nbval, nxval, &nlda, ldaval, &timmin, &a_ref(1, 1), e, &a_ref(1, 2), &a_ref(1, 3), d__, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "LQ") || lsamen_(& c__2, c3, "LQ") || lsamen_(&c__2, c3 + 1, "LQ")) { /* LQ routines */ ztimlq_(line, &nn, mval, nval, &nk, kval, &nnb, nbval, nxval, &nlda, ldaval, &timmin, &a_ref(1, 1), e, &a_ref(1, 2), &a_ref(1, 3), d__, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "QL") || lsamen_(& c__2, c3, "QL") || lsamen_(&c__2, c3 + 1, "QL")) { /* QL routines */ ztimql_(line, &nn, mval, nval, &nk, kval, &nnb, nbval, nxval, &nlda, ldaval, &timmin, &a_ref(1, 1), e, &a_ref(1, 2), &a_ref(1, 3), d__, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "RQ") || lsamen_(& c__2, c3, "RQ") || lsamen_(&c__2, c3 + 1, "RQ")) { /* RQ routines */ ztimrq_(line, &nn, mval, nval, &nk, kval, &nnb, nbval, nxval, &nlda, ldaval, &timmin, &a_ref(1, 1), e, &a_ref(1, 2), &a_ref(1, 3), d__, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "QP") || lsamen_(& c__3, c3, "QPF")) { /* QR with column pivoting */ ztimqp_(line, &nm, mval, nval, &nlda, ldaval, &timmin, &a_ref(1, 1), & a_ref(1, 2), e, &a_ref(1, 3), d__, iwork, reslts, &ldr1, & ldr2, &c__6, (ftnlen)80); /* Rank-Revealing QR factorization */ ztimq3_(line, &nm, mval, nval, &nnb, nbval, nxval, &nlda, ldaval, & timmin, &a_ref(1, 1), &a_ref(1, 2), e, &a_ref(1, 3), d__, iwork, reslts, &ldr1, &ldr2, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "HR") || lsamen_(& c__3, c3, "HRD") || lsamen_(&c__2, c3 + 1, "HR")) { /* Reduction to Hessenberg form */ ztimhr_(line, &nn, nval, &nk, kval, &nnb, nbval, nxval, &nlda, ldaval, &timmin, &a_ref(1, 1), e, &a_ref(1, 2), &a_ref(1, 3), d__, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "TD") || lsamen_(& c__3, c3, "TRD") || lsamen_(&c__2, c3 + 1, "TR")) { /* Reduction to tridiagonal form */ ztimtd_(line, &nn, nval, &nk, kval, &nnb, nbval, nxval, &nlda, ldaval, &timmin, &a_ref(1, 1), &a_ref(1, 2), d__, e, &a_ref(1, 3), reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "BR") || lsamen_(& c__3, c3, "BRD") || lsamen_(&c__2, c3 + 1, "BR")) { /* Reduction to bidiagonal form */ ztimbr_(line, &nn, mval, nval, &nk, kval, &nnb, nbval, nxval, &nlda, ldaval, &timmin, &a_ref(1, 1), &a_ref(1, 2), d__, e, &a_ref(1, 3), reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "GE")) { /* Routines for general matrices */ ztimge_(line, &nn, nval, &nk, kval, &nnb, nbval, &nlda, ldaval, & timmin, &a_ref(1, 1), &a_ref(1, 2), &a_ref(1, 3), iwork, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "GB")) { /* General band matrices */ if (ldamok) { ztimgb_(line, &nm, mval, &nn, nval, &nk, kval, &nnb, nbval, &nlda, ldaval, &timmin, &a_ref(1, 1), &a_ref(272384 - mkmax + 1, 3), iwork, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen) 80); } else { s_wsfe(&io___93); do_fio(&c__1, line, (ftnlen)6); e_wsfe(); } } else if (lsamen_(&c__2, c2, "GT")) { /* Routines for general tridiagonal matrices */ ztimgt_(line, &nn, nval, &nk, kval, &nlda, ldaval, &timmin, &a_ref(1, 1), &a_ref(1, 2), iwork, reslts, &ldr1, &ldr2, &ldr3, &c__6, ( ftnlen)80); } else if (lsamen_(&c__2, c2, "PO")) { /* Positive definite matrices */ ztimpo_(line, &nn, nval, &nk, kval, &nnb, nbval, &nlda, ldaval, & timmin, &a_ref(1, 1), &a_ref(1, 2), iwork, reslts, &ldr1, & ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "PP")) { /* Positive definite packed matrices */ ztimpp_(line, &nn, nval, &nk, kval, &c_b172, &timmin, &a_ref(1, 1), & a_ref(1, 2), iwork, reslts, &ldr1, &ldr2, &ldr3, &c__6, ( ftnlen)80); } else if (lsamen_(&c__2, c2, "PB")) { /* Positive definite banded matrices */ if (ldamok) { if (mkmax > 272384) { j2 = 2; i2 = 544768 - mkmax + 1; } else { j2 = 3; i2 = 272384 - mkmax + 1; } ztimpb_(line, &nm, mval, &nn, nval, &nk, kval, &nnb, nbval, &nlda, ldaval, &timmin, &a_ref(1, 1), &a_ref(i2, j2), iwork, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else { s_wsfe(&io___94); do_fio(&c__1, line, (ftnlen)6); e_wsfe(); } } else if (lsamen_(&c__2, c2, "PT")) { /* Routines for positive definite tridiagonal matrices */ ztimpt_(line, &nn, nval, &nk, kval, &nlda, ldaval, &timmin, d__, & a_ref(1, 1), &a_ref(1, 2), reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "HE")) { /* Hermitian indefinite matrices */ ztimhe_(line, &nn, nval, &nk, kval, &nnb, nbval, &nlda, ldaval, & timmin, &a_ref(1, 1), &a_ref(1, 2), &a_ref(1, 3), iwork, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "HP")) { /* Hermitian indefinite packed matrices */ ztimhp_(line, &nn, nval, &nk, kval, &c_b172, &timmin, &a_ref(1, 1), & a_ref(1, 2), &a_ref(1, 3), iwork, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "SY")) { /* Symmetric indefinite matrices */ ztimsy_(line, &nn, nval, &nk, kval, &nnb, nbval, &nlda, ldaval, & timmin, &a_ref(1, 1), &a_ref(1, 2), &a_ref(1, 3), iwork, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "SP")) { /* Symmetric indefinite packed matrices */ ztimsp_(line, &nn, nval, &nk, kval, &c_b172, &timmin, &a_ref(1, 1), & a_ref(1, 2), &a_ref(1, 3), iwork, reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "TR")) { /* Triangular matrices */ ztimtr_(line, &nn, nval, &nk, kval, &nnb, nbval, &nlda, ldaval, & timmin, &a_ref(1, 1), &a_ref(1, 2), reslts, &ldr1, &ldr2, & ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "TP")) { /* Triangular packed matrices */ ztimtp_(line, &nn, nval, &nk, kval, &c_b172, &timmin, &a_ref(1, 1), & a_ref(1, 2), reslts, &ldr1, &ldr2, &ldr3, &c__6, (ftnlen)80); } else if (lsamen_(&c__2, c2, "TB")) { /* Triangular band matrices */ if (ldamok) { if (mkmax > 272384) { j2 = 2; i2 = 544768 - mkmax + 1; } else { j2 = 3; i2 = 272384 - mkmax + 1; } ztimtb_(line, &nm, mval, &nn, nval, &nk, kval, &nlda, ldaval, & timmin, &a_ref(1, 1), &a_ref(i2, j2), reslts, &ldr1, & ldr2, &ldr3, &c__6, (ftnlen)80); } else { s_wsfe(&io___95); do_fio(&c__1, line, (ftnlen)6); e_wsfe(); } } else if (lsamen_(&c__2, c2, "LS")) { /* Least squares drivers */ ztimls_(line, &nm, mval, &nn, nval, &nk, kval, &nnb, nbval, nxval, & nlda, ldaval, &timmin, &a_ref(1, 1), &a_ref(1, 2), &b_ref(1, 1), &b_ref(1, 2), s, &s[512], opctbl, timtbl, flptbl, work, rwork, iwork, &c__6, (ftnlen)80); } else { s_wsfe(&io___103); do_fio(&c__1, line, (ftnlen)6); e_wsfe(); } /* Read the next line of the input file. */ i__1 = s_rsfe(&io___104); if (i__1 != 0) { goto L100; } i__1 = do_fio(&c__1, line, (ftnlen)80); if (i__1 != 0) { goto L100; } i__1 = e_rsfe(); if (i__1 != 0) { goto L100; } goto L90; /* Branch to this line when the last record is read. */ L100: s2 = dsecnd_(); s_wsfe(&io___106); e_wsfe(); s_wsfe(&io___107); d__1 = s2 - s1; do_fio(&c__1, (char *)&d__1, (ftnlen)sizeof(doublereal)); e_wsfe(); L110: /* End of ZTIMAA */ return 0; } /* MAIN__ */
/* Subroutine */ int dtimql_(char *line, integer *nm, integer *mval, integer * nval, integer *nk, integer *kval, integer *nnb, integer *nbval, integer *nxval, integer *nlda, integer *ldaval, doublereal *timmin, doublereal *a, doublereal *tau, doublereal *b, doublereal *work, doublereal *reslts, integer *ldr1, integer *ldr2, integer *ldr3, integer *nout, ftnlen line_len) { /* Initialized data */ static char subnam[6*3] = "DGEQLF" "DORGQL" "DORMQL"; static char sides[1*2] = "L" "R"; static char transs[1*2] = "N" "T"; static integer iseed[4] = { 0,0,0,1 }; /* Format strings */ static char fmt_9999[] = "(1x,a6,\002 timing run not attempted\002,/)"; static char fmt_9998[] = "(/\002 *** Speed of \002,a6,\002 in megaflops " "***\002)"; static char fmt_9997[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)"; static char fmt_9996[] = "(5x,\002K = min(M,N)\002,/)"; static char fmt_9995[] = "(/5x,a6,\002 with SIDE = '\002,a1,\002', TRANS" " = '\002,a1,\002', \002,a1,\002 =\002,i6,/)"; static char fmt_9994[] = "(\002 *** No pairs (M,N) found with M >= N: " " \002,a6,\002 not timed\002)"; /* System generated locals */ integer reslts_dim1, reslts_dim2, reslts_dim3, reslts_offset, i__1, i__2, i__3, i__4, i__5, i__6; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void); /* Local variables */ static integer ilda; static char labm[1], side[1]; static integer info; static char path[3]; static doublereal time; static integer isub, muse[12], nuse[12], i__, k, m, n; static char cname[6]; static integer iside; extern doublereal dopla_(char *, integer *, integer *, integer *, integer *, integer *); static integer itoff, itran, minmn; extern /* Subroutine */ int icopy_(integer *, integer *, integer *, integer *, integer *); static char trans[1]; static integer k1, i4, m1, n1; static doublereal s1, s2; extern /* Subroutine */ int dprtb4_(char *, char *, char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen, ftnlen), dprtb5_(char *, char *, char *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen, ftnlen); static integer ic, nb, ik, im; extern doublereal dsecnd_(void); extern /* Subroutine */ int dgeqlf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); static integer lw, nx, reseed[4]; extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, ftnlen), dlacpy_( char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal dmflop_(doublereal *, doublereal *, integer *); extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), dtimmg_( integer *, integer *, integer *, doublereal *, integer *, integer *, integer *), dlatms_(integer *, integer *, char *, integer *, char *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, char *, doublereal *, integer *, doublereal *, integer *), dorgql_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), dormql_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), xlaenv_(integer *, integer *); static doublereal untime; static logical timsub[3]; static integer lda, icl, inb, imx; static doublereal ops; /* Fortran I/O blocks */ static cilist io___9 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___29 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___31 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___32 = { 0, 0, 0, 0, 0 }; static cilist io___33 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___54 = { 0, 0, 0, fmt_9994, 0 }; #define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6] #define reslts_ref(a_1,a_2,a_3,a_4) reslts[(((a_4)*reslts_dim3 + (a_3))*\ reslts_dim2 + (a_2))*reslts_dim1 + a_1] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DTIMQL times the LAPACK routines to perform the QL factorization of a DOUBLE PRECISION general matrix. Arguments ========= LINE (input) CHARACTER*80 The input line that requested this routine. The first six characters contain either the name of a subroutine or a generic path name. The remaining characters may be used to specify the individual routines to be timed. See ATIMIN for a full description of the format of the input line. NM (input) INTEGER The number of values of M and N contained in the vectors MVAL and NVAL. The matrix sizes are used in pairs (M,N). MVAL (input) INTEGER array, dimension (NM) The values of the matrix row dimension M. NVAL (input) INTEGER array, dimension (NM) The values of the matrix column dimension N. NK (input) INTEGER The number of values of K in the vector KVAL. KVAL (input) INTEGER array, dimension (NK) The values of the matrix dimension K, used in DORMQL. NNB (input) INTEGER The number of values of NB and NX contained in the vectors NBVAL and NXVAL. The blocking parameters are used in pairs (NB,NX). NBVAL (input) INTEGER array, dimension (NNB) The values of the blocksize NB. NXVAL (input) INTEGER array, dimension (NNB) The values of the crossover point NX. NLDA (input) INTEGER The number of values of LDA contained in the vector LDAVAL. LDAVAL (input) INTEGER array, dimension (NLDA) The values of the leading dimension of the array A. TIMMIN (input) DOUBLE PRECISION The minimum time a subroutine will be timed. A (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX) where LDAMAX and NMAX are the maximum values of LDA and N. TAU (workspace) DOUBLE PRECISION array, dimension (min(M,N)) B (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX) WORK (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NBMAX) where NBMAX is the maximum value of NB. RESLTS (workspace) DOUBLE PRECISION array, dimension (LDR1,LDR2,LDR3,2*NK) The timing results for each subroutine over the relevant values of (M,N), (NB,NX), and LDA. LDR1 (input) INTEGER The first dimension of RESLTS. LDR1 >= max(1,NNB). LDR2 (input) INTEGER The second dimension of RESLTS. LDR2 >= max(1,NM). LDR3 (input) INTEGER The third dimension of RESLTS. LDR3 >= max(1,NLDA). NOUT (input) INTEGER The unit number for output. Internal Parameters =================== MODE INTEGER The matrix type. MODE = 3 is a geometric distribution of eigenvalues. See DLATMS for further details. COND DOUBLE PRECISION The condition number of the matrix. The singular values are set to values from DMAX to DMAX/COND. DMAX DOUBLE PRECISION The magnitude of the largest singular value. ===================================================================== Parameter adjustments */ --mval; --nval; --kval; --nbval; --nxval; --ldaval; --a; --tau; --b; --work; reslts_dim1 = *ldr1; reslts_dim2 = *ldr2; reslts_dim3 = *ldr3; reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * (1 + reslts_dim3 * 1) ); reslts -= reslts_offset; /* Function Body Extract the timing request from the input line. */ s_copy(path, "Double precision", (ftnlen)1, (ftnlen)16); s_copy(path + 1, "QL", (ftnlen)2, (ftnlen)2); atimin_(path, line, &c__3, subnam, timsub, nout, &info, (ftnlen)3, ( ftnlen)80, (ftnlen)6); if (info != 0) { goto L230; } /* Check that M <= LDA for the input values. */ s_copy(cname, line, (ftnlen)6, (ftnlen)6); atimck_(&c__1, cname, nm, &mval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); if (info > 0) { io___9.ciunit = *nout; s_wsfe(&io___9); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); goto L230; } /* Do for each pair of values (M,N): */ i__1 = *nm; for (im = 1; im <= i__1; ++im) { m = mval[im]; n = nval[im]; minmn = min(m,n); icopy_(&c__4, iseed, &c__1, reseed, &c__1); /* Do for each value of LDA: */ i__2 = *nlda; for (ilda = 1; ilda <= i__2; ++ilda) { lda = ldaval[ilda]; /* Do for each pair of values (NB, NX) in NBVAL and NXVAL. */ i__3 = *nnb; for (inb = 1; inb <= i__3; ++inb) { nb = nbval[inb]; xlaenv_(&c__1, &nb); nx = nxval[inb]; xlaenv_(&c__3, &nx); /* Computing MAX */ i__4 = 1, i__5 = n * max(1,nb); lw = max(i__4,i__5); /* Generate a test matrix of size M by N. */ icopy_(&c__4, reseed, &c__1, iseed, &c__1); dlatms_(&m, &n, "Uniform", iseed, "Nonsymm", &tau[1], &c__3, & c_b24, &c_b25, &m, &n, "No packing", &b[1], &lda, & work[1], &info); if (timsub[0]) { /* DGEQLF: QL factorization */ dlacpy_("Full", &m, &n, &b[1], &lda, &a[1], &lda); ic = 0; s1 = dsecnd_(); L10: dgeqlf_(&m, &n, &a[1], &lda, &tau[1], &work[1], &lw, & info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { dlacpy_("Full", &m, &n, &b[1], &lda, &a[1], &lda); goto L10; } /* Subtract the time used in DLACPY. */ icl = 1; s1 = dsecnd_(); L20: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { dlacpy_("Full", &m, &n, &a[1], &lda, &b[1], &lda); goto L20; } time = (time - untime) / (doublereal) ic; ops = dopla_("DGEQLF", &m, &n, &c__0, &c__0, &nb); reslts_ref(inb, im, ilda, 1) = dmflop_(&ops, &time, &info) ; } else { /* If DGEQLF was not timed, generate a matrix and factor it using DGEQLF anyway so that the factored form of the matrix can be used in timing the other routines. */ dlacpy_("Full", &m, &n, &b[1], &lda, &a[1], &lda); dgeqlf_(&m, &n, &a[1], &lda, &tau[1], &work[1], &lw, & info); } if (timsub[1]) { /* DORGQL: Generate orthogonal matrix Q from the QL factorization */ dlacpy_("Full", &m, &minmn, &a[1], &lda, &b[1], &lda); ic = 0; s1 = dsecnd_(); L30: dorgql_(&m, &minmn, &minmn, &b[1], &lda, &tau[1], &work[1] , &lw, &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { dlacpy_("Full", &m, &minmn, &a[1], &lda, &b[1], &lda); goto L30; } /* Subtract the time used in DLACPY. */ icl = 1; s1 = dsecnd_(); L40: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { dlacpy_("Full", &m, &minmn, &a[1], &lda, &b[1], &lda); goto L40; } time = (time - untime) / (doublereal) ic; ops = dopla_("DORGQL", &m, &minmn, &minmn, &c__0, &nb); reslts_ref(inb, im, ilda, 2) = dmflop_(&ops, &time, &info) ; } /* L50: */ } /* L60: */ } /* L70: */ } /* Print tables of results */ for (isub = 1; isub <= 2; ++isub) { if (! timsub[isub - 1]) { goto L90; } io___29.ciunit = *nout; s_wsfe(&io___29); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); e_wsfe(); if (*nlda > 1) { i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___31.ciunit = *nout; s_wsfe(&io___31); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer)); e_wsfe(); /* L80: */ } } io___32.ciunit = *nout; s_wsle(&io___32); e_wsle(); if (isub == 2) { io___33.ciunit = *nout; s_wsfe(&io___33); e_wsfe(); } dprtb4_("( NB, NX)", "M", "N", nnb, &nbval[1], &nxval[1], nm, &mval[ 1], &nval[1], nlda, &reslts_ref(1, 1, 1, isub), ldr1, ldr2, nout, (ftnlen)11, (ftnlen)1, (ftnlen)1); L90: ; } /* Time DORMQL separately. Here the starting matrix is M by N, and K is the free dimension of the matrix multiplied by Q. */ if (timsub[2]) { /* Check that K <= LDA for the input values. */ atimck_(&c__3, cname, nk, &kval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); if (info > 0) { io___34.ciunit = *nout; s_wsfe(&io___34); do_fio(&c__1, subnam_ref(0, 3), (ftnlen)6); e_wsfe(); goto L230; } /* Use only the pairs (M,N) where M >= N. */ imx = 0; i__1 = *nm; for (im = 1; im <= i__1; ++im) { if (mval[im] >= nval[im]) { ++imx; muse[imx - 1] = mval[im]; nuse[imx - 1] = nval[im]; } /* L100: */ } /* DORMQL: Multiply by Q stored as a product of elementary transformations Do for each pair of values (M,N): */ i__1 = imx; for (im = 1; im <= i__1; ++im) { m = muse[im - 1]; n = nuse[im - 1]; /* Do for each value of LDA: */ i__2 = *nlda; for (ilda = 1; ilda <= i__2; ++ilda) { lda = ldaval[ilda]; /* Generate an M by N matrix and form its QL decomposition. */ dlatms_(&m, &n, "Uniform", iseed, "Nonsymm", &tau[1], &c__3, & c_b24, &c_b25, &m, &n, "No packing", &a[1], &lda, & work[1], &info); /* Computing MAX */ i__3 = 1, i__4 = n * max(1,nb); lw = max(i__3,i__4); dgeqlf_(&m, &n, &a[1], &lda, &tau[1], &work[1], &lw, &info); /* Do first for SIDE = 'L', then for SIDE = 'R' */ i4 = 0; for (iside = 1; iside <= 2; ++iside) { *(unsigned char *)side = *(unsigned char *)&sides[iside - 1]; /* Do for each pair of values (NB, NX) in NBVAL and NXVAL. */ i__3 = *nnb; for (inb = 1; inb <= i__3; ++inb) { nb = nbval[inb]; xlaenv_(&c__1, &nb); nx = nxval[inb]; xlaenv_(&c__3, &nx); /* Do for each value of K in KVAL */ i__4 = *nk; for (ik = 1; ik <= i__4; ++ik) { k = kval[ik]; /* Sort out which variable is which */ if (iside == 1) { m1 = m; k1 = n; n1 = k; /* Computing MAX */ i__5 = 1, i__6 = n1 * max(1,nb); lw = max(i__5,i__6); } else { n1 = m; k1 = n; m1 = k; /* Computing MAX */ i__5 = 1, i__6 = m1 * max(1,nb); lw = max(i__5,i__6); } /* Do first for TRANS = 'N', then for TRANS = 'T' */ itoff = 0; for (itran = 1; itran <= 2; ++itran) { *(unsigned char *)trans = *(unsigned char *)& transs[itran - 1]; dtimmg_(&c__0, &m1, &n1, &b[1], &lda, &c__0, & c__0); ic = 0; s1 = dsecnd_(); L110: dormql_(side, trans, &m1, &n1, &k1, &a[1], & lda, &tau[1], &b[1], &lda, &work[1], & lw, &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { dtimmg_(&c__0, &m1, &n1, &b[1], &lda, & c__0, &c__0); goto L110; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L120: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { dtimmg_(&c__0, &m1, &n1, &b[1], &lda, & c__0, &c__0); goto L120; } time = (time - untime) / (doublereal) ic; i__5 = iside - 1; ops = dopla_("DORMQL", &m1, &n1, &k1, &i__5, & nb); reslts_ref(inb, im, ilda, i4 + itoff + ik) = dmflop_(&ops, &time, &info); itoff = *nk; /* L130: */ } /* L140: */ } /* L150: */ } i4 = *nk << 1; /* L160: */ } /* L170: */ } /* L180: */ } /* Print tables of results */ isub = 3; i4 = 1; if (imx >= 1) { for (iside = 1; iside <= 2; ++iside) { *(unsigned char *)side = *(unsigned char *)&sides[iside - 1]; if (iside == 1) { io___49.ciunit = *nout; s_wsfe(&io___49); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); e_wsfe(); if (*nlda > 1) { i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___50.ciunit = *nout; s_wsfe(&io___50); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen) sizeof(integer)); e_wsfe(); /* L190: */ } } } for (itran = 1; itran <= 2; ++itran) { *(unsigned char *)trans = *(unsigned char *)&transs[itran - 1]; i__1 = *nk; for (ik = 1; ik <= i__1; ++ik) { if (iside == 1) { n = kval[ik]; io___51.ciunit = *nout; s_wsfe(&io___51); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; e_wsfe(); *(unsigned char *)labm = 'M'; } else { m = kval[ik]; io___53.ciunit = *nout; s_wsfe(&io___53); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, "M", (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)) ; e_wsfe(); *(unsigned char *)labm = 'N'; } dprtb5_("NB", labm, "K", nnb, &nbval[1], &imx, muse, nuse, nlda, &reslts_ref(1, 1, 1, i4), ldr1, ldr2, nout, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++i4; /* L200: */ } /* L210: */ } /* L220: */ } } else { io___54.ciunit = *nout; s_wsfe(&io___54); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); e_wsfe(); } } L230: return 0; /* End of DTIMQL */ } /* dtimql_ */
/* Subroutine */ int dtimgt_(char *line, integer *nm, integer *mval, integer * nns, integer *nsval, integer *nlda, integer *ldaval, doublereal * timmin, doublereal *a, doublereal *b, integer *iwork, doublereal * reslts, integer *ldr1, integer *ldr2, integer *ldr3, integer *nout, ftnlen line_len) { /* Initialized data */ static char subnam[6*4] = "DGTTRF" "DGTTRS" "DGTSV " "DGTSL "; static char transs[1*2] = "N" "T"; /* Format strings */ static char fmt_9998[] = "(1x,a6,\002 timing run not attempted\002,/)"; static char fmt_9997[] = "(/\002 *** Speed of \002,a6,\002 in megaflops " "***\002)"; static char fmt_9996[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)"; static char fmt_9999[] = "(\002 DGTTRS with TRANS = '\002,a1,\002'\002,/)" ; /* System generated locals */ integer reslts_dim1, reslts_dim2, reslts_dim3, reslts_offset, i__1, i__2, i__3, i__4; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void); /* Local variables */ static integer ilda, info; static char path[3]; static doublereal time; static integer isub, nrhs, i__, m, n; static char cname[6]; extern doublereal dopgb_(char *, integer *, integer *, integer *, integer *, integer *); static integer laval[1], itran; extern /* Subroutine */ int dgtsl_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); static char trans[1]; extern /* Subroutine */ int dgtsv_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static doublereal s1, s2; static integer ic, im; extern doublereal dsecnd_(void); extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, ftnlen); extern doublereal dmflop_(doublereal *, doublereal *, integer *); extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), dtimmg_( integer *, integer *, integer *, doublereal *, integer *, integer *, integer *), dprtbl_(char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen), dgttrf_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static doublereal untime; static logical timsub[4]; extern /* Subroutine */ int dgttrs_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static integer ldb, icl; static doublereal ops; /* Fortran I/O blocks */ static cilist io___8 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___26 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___27 = { 0, 0, 0, 0, 0 }; static cilist io___29 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___30 = { 0, 0, 0, fmt_9999, 0 }; #define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6] #define reslts_ref(a_1,a_2,a_3,a_4) reslts[(((a_4)*reslts_dim3 + (a_3))*\ reslts_dim2 + (a_2))*reslts_dim1 + a_1] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DTIMGT times DGTTRF, -TRS, -SV, and -SL. Arguments ========= LINE (input) CHARACTER*80 The input line that requested this routine. The first six characters contain either the name of a subroutine or a generic path name. The remaining characters may be used to specify the individual routines to be timed. See ATIMIN for a full description of the format of the input line. NM (input) INTEGER The number of values of M contained in the vector MVAL. MVAL (input) INTEGER array, dimension (NM) The values of the matrix size M. NNS (input) INTEGER The number of values of NRHS contained in the vector NSVAL. NSVAL (input) INTEGER array, dimension (NNS) The values of the number of right hand sides NRHS. NLDA (input) INTEGER The number of values of LDA contained in the vector LDAVAL. LDAVAL (input) INTEGER array, dimension (NLDA) The values of the leading dimension of the array A. TIMMIN (input) DOUBLE PRECISION The minimum time a subroutine will be timed. A (workspace) DOUBLE PRECISION array, dimension (NMAX*4) where NMAX is the maximum value permitted for N. B (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX) IWORK (workspace) INTEGER array, dimension (NMAX) RESLTS (output) DOUBLE PRECISION array, dimension (LDR1,LDR2,LDR3,NSUBS+1) The timing results for each subroutine over the relevant values of N. LDR1 (input) INTEGER The first dimension of RESLTS. LDR1 >= 1. LDR2 (input) INTEGER The second dimension of RESLTS. LDR2 >= max(1,NM). LDR3 (input) INTEGER The third dimension of RESLTS. LDR3 >= max(1,NLDA). NOUT (input) INTEGER The unit number for output. ===================================================================== Parameter adjustments */ --mval; --nsval; --ldaval; --a; --b; --iwork; reslts_dim1 = *ldr1; reslts_dim2 = *ldr2; reslts_dim3 = *ldr3; reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * (1 + reslts_dim3 * 1) ); reslts -= reslts_offset; /* Function Body Extract the timing request from the input line. */ s_copy(path, "Double precision", (ftnlen)1, (ftnlen)16); s_copy(path + 1, "GT", (ftnlen)2, (ftnlen)2); atimin_(path, line, &c__4, subnam, timsub, nout, &info, (ftnlen)3, ( ftnlen)80, (ftnlen)6); if (info != 0) { goto L180; } /* Check that N <= LDA for the input values. */ for (isub = 2; isub <= 4; ++isub) { if (! timsub[isub - 1]) { goto L10; } s_copy(cname, subnam_ref(0, isub), (ftnlen)6, (ftnlen)6); atimck_(&c__2, cname, nm, &mval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); if (info > 0) { io___8.ciunit = *nout; s_wsfe(&io___8); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); timsub[isub - 1] = FALSE_; } L10: ; } /* Do for each value of M: */ i__1 = *nm; for (im = 1; im <= i__1; ++im) { m = mval[im]; n = max(m,1); /* Time DGTTRF */ if (timsub[0]) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L20: dgttrf_(&m, &a[1], &a[n], &a[n * 2], &a[n * 3 - 2], &iwork[1], & info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); goto L20; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L30: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); goto L30; } time = (time - untime) / (doublereal) ic; ops = dopgb_("DGTTRF", &m, &m, &c__1, &c__1, &iwork[1]) ; reslts_ref(1, im, 1, 1) = dmflop_(&ops, &time, &info); } else if (timsub[1]) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); } /* Generate another matrix and factor it using DGTTRF so that the factored form can be used in timing the other routines. */ if (ic != 1) { dgttrf_(&m, &a[1], &a[n], &a[n * 2], &a[n * 3 - 2], &iwork[1], & info); } /* Time DGTTRS */ if (timsub[1]) { for (itran = 1; itran <= 2; ++itran) { *(unsigned char *)trans = *(unsigned char *)&transs[itran - 1] ; i__2 = *nlda; for (ilda = 1; ilda <= i__2; ++ilda) { ldb = ldaval[ilda]; i__3 = *nns; for (i__ = 1; i__ <= i__3; ++i__) { nrhs = nsval[i__]; dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L40: dgttrs_(trans, &m, &nrhs, &a[1], &a[n], &a[n * 2], &a[ n * 3 - 2], &iwork[1], &b[1], &ldb, &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, & c__0); goto L40; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L50: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, & c__0); goto L50; } time = (time - untime) / (doublereal) ic; ops = dopgb_("DGTTRS", &m, &nrhs, &c__0, &c__0, & iwork[1]); if (itran == 1) { reslts_ref(i__, im, ilda, 2) = dmflop_(&ops, & time, &info); } else { reslts_ref(i__, im, ilda, 5) = dmflop_(&ops, & time, &info); } /* L60: */ } /* L70: */ } /* L80: */ } } if (timsub[2]) { i__2 = *nlda; for (ilda = 1; ilda <= i__2; ++ilda) { ldb = ldaval[ilda]; i__3 = *nns; for (i__ = 1; i__ <= i__3; ++i__) { nrhs = nsval[i__]; i__4 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__4, &c__0, &c__0); dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L90: dgtsv_(&m, &nrhs, &a[1], &a[n], &a[n * 2], &b[1], &ldb, & info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { i__4 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__4, &c__0, &c__0); dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, &c__0); goto L90; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L100: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { i__4 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__4, &c__0, &c__0); dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, &c__0); goto L100; } time = (time - untime) / (doublereal) ic; ops = dopgb_("DGTSV ", &m, &nrhs, &c__0, &c__0, &iwork[1]); reslts_ref(i__, im, ilda, 3) = dmflop_(&ops, &time, &info) ; /* L110: */ } /* L120: */ } } if (timsub[3]) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); dtimmg_(&c__0, &m, &c__1, &b[1], &n, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L130: dgtsl_(&m, &a[1], &a[n], &a[n * 2], &b[1], &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); dtimmg_(&c__0, &m, &c__1, &b[1], &ldb, &c__0, &c__0); goto L130; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L140: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); dtimmg_(&c__0, &m, &c__1, &b[1], &ldb, &c__0, &c__0); goto L140; } time = (time - untime) / (doublereal) ic; ops = dopgb_("DGTSV ", &m, &c__1, &c__0, &c__0, &iwork[1]); reslts_ref(1, im, 1, 4) = dmflop_(&ops, &time, &info); } /* L150: */ } /* Print a table of results for each timed routine. */ for (isub = 1; isub <= 4; ++isub) { if (! timsub[isub - 1]) { goto L170; } io___25.ciunit = *nout; s_wsfe(&io___25); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); e_wsfe(); if (*nlda > 1 && (timsub[1] || timsub[2])) { i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___26.ciunit = *nout; s_wsfe(&io___26); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer)); e_wsfe(); /* L160: */ } } io___27.ciunit = *nout; s_wsle(&io___27); e_wsle(); if (isub == 1) { dprtbl_(" ", "N", &c__1, laval, nm, &mval[1], &c__1, &reslts[ reslts_offset], ldr1, ldr2, nout, (ftnlen)1, (ftnlen)1); } else if (isub == 2) { io___29.ciunit = *nout; s_wsfe(&io___29); do_fio(&c__1, "N", (ftnlen)1); e_wsfe(); dprtbl_("NRHS", "N", nns, &nsval[1], nm, &mval[1], nlda, & reslts_ref(1, 1, 1, 2), ldr1, ldr2, nout, (ftnlen)4, ( ftnlen)1); io___30.ciunit = *nout; s_wsfe(&io___30); do_fio(&c__1, "T", (ftnlen)1); e_wsfe(); dprtbl_("NRHS", "N", nns, &nsval[1], nm, &mval[1], nlda, & reslts_ref(1, 1, 1, 5), ldr1, ldr2, nout, (ftnlen)4, ( ftnlen)1); } else if (isub == 3) { dprtbl_("NRHS", "N", nns, &nsval[1], nm, &mval[1], nlda, & reslts_ref(1, 1, 1, 3), ldr1, ldr2, nout, (ftnlen)4, ( ftnlen)1); } else if (isub == 4) { dprtbl_(" ", "N", &c__1, laval, nm, &mval[1], &c__1, &reslts_ref( 1, 1, 1, 4), ldr1, ldr2, nout, (ftnlen)1, (ftnlen)1); } L170: ; } L180: return 0; /* End of DTIMGT */ } /* dtimgt_ */
/* Subroutine */ int dtimmv_(char *vname, integer *nn, integer *nval, integer *nk, integer *kval, integer *nlda, integer *ldaval, doublereal * timmin, doublereal *a, integer *lb, doublereal *b, doublereal *c__, doublereal *reslts, integer *ldr1, integer *ldr2, integer *nout, ftnlen vname_len) { /* Initialized data */ static char subnam[6*2] = "DGEMV " "DGBMV "; /* Format strings */ static char fmt_9999[] = "(1x,a6,\002: Unrecognized path or subroutine " "name\002,/)"; static char fmt_9998[] = "(1x,a6,\002 timing run not attempted\002,/)"; static char fmt_9997[] = "(/\002 *** Speed of \002,a6,\002 in megaflops " "***\002)"; static char fmt_9996[] = "(5x,\002with LDA = \002,i5)"; static char fmt_9995[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)"; /* System generated locals */ integer reslts_dim1, reslts_dim2, reslts_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void); /* Local variables */ static integer ilda, info; static doublereal time; static integer isub, nrhs, i__, k, n; static char cname[6]; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static doublereal s1, s2; extern doublereal dopbl2_(char *, integer *, integer *, integer *, integer *); static integer ib, ic, ik, in, kl, ku; extern doublereal dsecnd_(void); extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, ftnlen); extern logical lsamen_(integer *, char *, char *); extern doublereal dmflop_(doublereal *, doublereal *, integer *); extern /* Subroutine */ int dtimmg_(integer *, integer *, integer *, doublereal *, integer *, integer *, integer *), dprtbl_(char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen); static doublereal untime; static logical timsub[2]; static integer lda, ldb, icl; static doublereal ops; static char lab1[1], lab2[1]; /* Fortran I/O blocks */ static cilist io___5 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___9 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___10 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___11 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___13 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___14 = { 0, 0, 0, 0, 0 }; #define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6] #define reslts_ref(a_1,a_2,a_3) reslts[((a_3)*reslts_dim2 + (a_2))*\ reslts_dim1 + a_1] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DTIMMV times individual BLAS 2 routines. Arguments ========= VNAME (input) CHARACTER*(*) The name of the Level 2 BLAS routine to be timed. NN (input) INTEGER The number of values of N contained in the vector NVAL. NVAL (input) INTEGER array, dimension (NN) The values of the matrix dimension N. NK (input) INTEGER The number of values of K contained in the vector KVAL. KVAL (input) INTEGER array, dimension (NK) The values of the bandwidth K. NLDA (input) INTEGER The number of values of LDA contained in the vector LDAVAL. LDAVAL (input) INTEGER array, dimension (NLDA) The values of the leading dimension of the array A. TIMMIN (input) DOUBLE PRECISION The minimum time a subroutine will be timed. A (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX) where LDAMAX and NMAX are the maximum values permitted for LDA and N. LB (input) INTEGER The length of B and C, needed when timing DGBMV. If timing DGEMV, LB >= LDAMAX*NMAX. B (workspace) DOUBLE PRECISION array, dimension (LB) C (workspace) DOUBLE PRECISION array, dimension (LB) RESLTS (output) DOUBLE PRECISION array, dimension (LDR1,LDR2,NLDA) The timing results for each subroutine over the relevant values of N and LDA. LDR1 (input) INTEGER The first dimension of RESLTS. LDR1 >= max(1,NK). LDR2 (input) INTEGER The second dimension of RESLTS. LDR2 >= max(1,NN). NOUT (input) INTEGER The unit number for output. ===================================================================== Parameter adjustments */ --nval; --kval; --ldaval; --a; --b; --c__; reslts_dim1 = *ldr1; reslts_dim2 = *ldr2; reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * 1); reslts -= reslts_offset; /* Function Body */ s_copy(cname, vname, (ftnlen)6, vname_len); for (isub = 1; isub <= 2; ++isub) { timsub[isub - 1] = lsamen_(&c__6, cname, subnam_ref(0, isub)); if (timsub[isub - 1]) { goto L20; } /* L10: */ } io___5.ciunit = *nout; s_wsfe(&io___5); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); goto L150; L20: /* Check that N or K <= LDA for the input values. */ if (lsame_(cname + 2, "B")) { atimck_(&c__0, cname, nk, &kval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); *(unsigned char *)lab1 = 'M'; *(unsigned char *)lab2 = 'K'; } else { atimck_(&c__2, cname, nn, &nval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); *(unsigned char *)lab1 = ' '; *(unsigned char *)lab2 = 'N'; } if (info > 0) { io___9.ciunit = *nout; s_wsfe(&io___9); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); goto L150; } /* Print the table header on unit NOUT. */ io___10.ciunit = *nout; s_wsfe(&io___10); do_fio(&c__1, vname, vname_len); e_wsfe(); if (*nlda == 1) { io___11.ciunit = *nout; s_wsfe(&io___11); do_fio(&c__1, (char *)&ldaval[1], (ftnlen)sizeof(integer)); e_wsfe(); } else { i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___13.ciunit = *nout; s_wsfe(&io___13); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer)); e_wsfe(); /* L30: */ } } io___14.ciunit = *nout; s_wsle(&io___14); e_wsle(); /* Time DGEMV */ if (timsub[0]) { i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nn; for (in = 1; in <= i__2; ++in) { n = nval[in]; nrhs = n; ldb = lda; dtimmg_(&c__1, &n, &n, &a[1], &lda, &c__0, &c__0); dtimmg_(&c__0, &n, &nrhs, &b[1], &ldb, &c__0, &c__0); dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L40: ib = 1; i__3 = nrhs; for (i__ = 1; i__ <= i__3; ++i__) { dgemv_("No transpose", &n, &n, &c_b44, &a[1], &lda, &b[ib] , &c__1, &c_b44, &c__[ib], &c__1); ib += ldb; /* L50: */ } s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0); goto L40; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L60: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0); goto L60; } time = (time - untime) / (doublereal) ic; ops = nrhs * dopbl2_("DGEMV ", &n, &n, &c__0, &c__0); reslts_ref(1, in, ilda) = dmflop_(&ops, &time, &c__0); /* L70: */ } /* L80: */ } dprtbl_(lab1, lab2, &c__1, &nval[1], nn, &nval[1], nlda, &reslts[ reslts_offset], ldr1, ldr2, nout, (ftnlen)1, (ftnlen)1); } else if (timsub[1]) { /* Time DGBMV */ i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nn; for (in = 1; in <= i__2; ++in) { n = nval[in]; i__3 = *nk; for (ik = 1; ik <= i__3; ++ik) { /* Computing MIN Computing MAX */ i__6 = 0, i__7 = kval[ik]; i__4 = n - 1, i__5 = max(i__6,i__7); k = min(i__4,i__5); kl = k; ku = k; ldb = n; dtimmg_(&c__2, &n, &n, &a[1], &lda, &kl, &ku); /* Computing MIN */ i__4 = k, i__5 = *lb / ldb; nrhs = min(i__4,i__5); dtimmg_(&c__0, &n, &nrhs, &b[1], &ldb, &c__0, &c__0); dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L90: ib = 1; i__4 = nrhs; for (i__ = 1; i__ <= i__4; ++i__) { dgbmv_("No transpose", &n, &n, &kl, &ku, &c_b44, &a[ ku + 1], &lda, &b[ib], &c__1, &c_b44, &c__[ib] , &c__1); ib += ldb; /* L100: */ } s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0) ; goto L90; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L110: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0) ; goto L110; } time = (time - untime) / (doublereal) ic; ops = nrhs * dopbl2_("DGBMV ", &n, &n, &kl, &ku); reslts_ref(in, ik, ilda) = dmflop_(&ops, &time, &c__0); /* L120: */ } /* L130: */ } /* L140: */ } dprtbl_(lab1, lab2, nn, &nval[1], nk, &kval[1], nlda, &reslts[ reslts_offset], ldr1, ldr2, nout, (ftnlen)1, (ftnlen)1); } L150: return 0; /* End of DTIMMV */ } /* dtimmv_ */
/* Main program */ int MAIN__(void) { /* Initialized data */ static doublereal threq = 2.; static char intstr[10] = "0123456789"; /* Format strings */ static char fmt_9994[] = "(\002 Tests of the COMPLEX*16 LAPACK routines" " \002,/\002 LAPACK VERSION \002,i1,\002.\002,i1,\002.\002,i1,/" "/\002 The following parameter values will be used:\002)"; static char fmt_9996[] = "(\002 Invalid input value: \002,a4,\002=\002,i" "6,\002; must be >=\002,i6)"; static char fmt_9995[] = "(\002 Invalid input value: \002,a4,\002=\002,i" "6,\002; must be <=\002,i6)"; static char fmt_9993[] = "(4x,a4,\002: \002,10i6,/11x,10i6)"; static char fmt_9992[] = "(/\002 Routines pass computational tests if te" "st ratio is \002,\002less than\002,f8.2,/)"; static char fmt_9999[] = "(/\002 Execution not attempted due to input er" "rors\002)"; static char fmt_9991[] = "(\002 Relative machine \002,a,\002 is taken to" " be\002,d16.6)"; static char fmt_9990[] = "(/1x,a3,\002: Unrecognized path name\002)"; static char fmt_9989[] = "(/1x,a3,\002 routines were not tested\002)"; static char fmt_9988[] = "(/1x,a3,\002 driver routines were not teste" "d\002)"; static char fmt_9998[] = "(/\002 End of tests\002)"; static char fmt_9997[] = "(\002 Total time used = \002,f12.2,\002 seco" "nds\002,/)"; /* System generated locals */ integer i__1, i__2; doublereal d__1; cilist ci__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), e_rsle(void), s_wsfe(cilist *), do_fio(integer * , char *, ftnlen), e_wsfe(void), do_lio(integer *, integer *, char *, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void); /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer f_clos(cllist *); /* Local variables */ doublecomplex a[153384] /* was [21912][7] */, b[8448] /* was [2112][ 4] */; integer i__, j, k; doublereal s[264]; char c1[1], c2[2]; doublereal s1, s2; integer ic, la, nb, nm, nn, vers_patch__, vers_major__, vers_minor__, lda, nnb; doublereal eps; integer nns, nnb2; char path[3]; integer mval[12], nval[12], nrhs; doublecomplex work[20856] /* was [132][158] */; integer lafac; logical fatal; char aline[72]; extern logical lsame_(char *, char *); integer nbval[12], nmats, nsval[12], nxval[12], iwork[3300]; doublereal rwork[19832]; integer nbval2[12]; extern /* Subroutine */ int zchkq3_(logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *); extern doublereal dlamch_(char *), dsecnd_(void); extern /* Subroutine */ int alareq_(char *, integer *, logical *, integer *, integer *, integer *), zchkgb_(logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *), zchkge_(logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublereal *, integer *, integer *), zchkhe_( logical *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, integer *, integer *); extern logical lsamen_(integer *, char *, char *); extern /* Subroutine */ int zchkpb_(logical *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublereal *, integer *), ilaver_(integer *, integer *, integer *), zchkeq_(doublereal *, integer *), zchktb_(logical *, integer * , integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *), zchkhp_(logical *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, integer *, integer *), zchkgt_(logical *, integer *, integer *, integer *, integer *, doublereal *, logical *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublereal *, integer *, integer *), zchklq_( logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *); doublereal thresh; extern /* Subroutine */ int zchkpo_(logical *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublereal *, integer *), zchkpp_(logical *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *); logical tstchk; extern /* Subroutine */ int zchkql_(logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *), zchkpt_(logical *, integer *, integer *, integer *, integer *, doublereal *, logical *, doublecomplex *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *); logical dotype[30]; extern /* Subroutine */ int zchkqp_(logical *, integer *, integer *, integer *, integer *, doublereal *, logical *, doublecomplex *, doublecomplex *, doublereal *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *), zchkqr_( logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *), zchkrq_(logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *), zchksp_(logical *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, integer *, integer *), zchktp_(logical *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublereal *, integer *), zchktr_(logical *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublereal *, integer *), zchksy_(logical *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, integer *, integer *), zdrvgb_(logical *, integer *, integer *, integer *, doublereal *, logical *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, integer *, integer *), zchktz_( logical *, integer *, integer *, integer *, integer *, doublereal *, logical *, doublecomplex *, doublecomplex *, doublereal *, doublereal *, doublecomplex *, doublecomplex *, doublereal *, integer *), zdrvge_(logical *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, integer *, integer *), zdrvhe_(logical *, integer * , integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *, integer *), zdrvgt_(logical *, integer *, integer *, integer *, doublereal *, logical *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublereal *, integer *, integer *), zdrvhp_( logical *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublereal *, integer *, integer *); integer ntypes; logical tsterr; extern /* Subroutine */ int zdrvls_(logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, logical *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublereal *, doublereal *, doublecomplex *, doublereal *, integer *, integer *); logical tstdrv; extern /* Subroutine */ int zdrvpb_(logical *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, integer *), zdrvpo_(logical *, integer *, integer * , integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, integer *), zdrvpp_(logical *, integer *, integer * , integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, integer *), zdrvpt_(logical *, integer *, integer * , integer *, doublereal *, logical *, doublecomplex *, doublereal *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, integer *), zdrvsp_(logical *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublereal *, integer *, integer *), zdrvsy_( logical *, integer *, integer *, integer *, doublereal *, logical *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublereal *, integer *, integer *); /* Fortran I/O blocks */ static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___10 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___14 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___18 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___19 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___20 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___21 = { 0, 5, 0, 0, 0 }; static cilist io___23 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___24 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___25 = { 0, 5, 0, 0, 0 }; static cilist io___27 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___28 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___29 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___30 = { 0, 5, 0, 0, 0 }; static cilist io___32 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___33 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___37 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___38 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___39 = { 0, 5, 0, 0, 0 }; static cilist io___41 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___42 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___43 = { 0, 5, 0, 0, 0 }; static cilist io___45 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___46 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___51 = { 0, 5, 0, 0, 0 }; static cilist io___53 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___54 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___55 = { 0, 5, 0, 0, 0 }; static cilist io___57 = { 0, 6, 0, fmt_9992, 0 }; static cilist io___58 = { 0, 5, 0, 0, 0 }; static cilist io___60 = { 0, 5, 0, 0, 0 }; static cilist io___62 = { 0, 5, 0, 0, 0 }; static cilist io___64 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___66 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___67 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___68 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___69 = { 0, 6, 0, 0, 0 }; static cilist io___78 = { 0, 6, 0, fmt_9990, 0 }; static cilist io___79 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___87 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___89 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___92 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___93 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___94 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___95 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___96 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___97 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___98 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___99 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___100 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___101 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___102 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___103 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___104 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___105 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___106 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___107 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___108 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___109 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___110 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___111 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___112 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___113 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___114 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___115 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___116 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___117 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___118 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___119 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___120 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___121 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___122 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___123 = { 0, 6, 0, fmt_9990, 0 }; static cilist io___125 = { 0, 6, 0, fmt_9998, 0 }; static cilist io___126 = { 0, 6, 0, fmt_9997, 0 }; /* -- LAPACK test routine (version 3.1.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* January 2007 */ /* Purpose */ /* ======= */ /* ZCHKAA is the main test program for the COMPLEX*16 linear equation */ /* routines. */ /* The program must be driven by a short data file. The first 14 records */ /* specify problem dimensions and program options using list-directed */ /* input. The remaining lines specify the LAPACK test paths and the */ /* number of matrix types to use in testing. An annotated example of a */ /* data file can be obtained by deleting the first 3 characters from the */ /* following 38 lines: */ /* Data file for testing COMPLEX*16 LAPACK linear equation routines */ /* 7 Number of values of M */ /* 0 1 2 3 5 10 16 Values of M (row dimension) */ /* 7 Number of values of N */ /* 0 1 2 3 5 10 16 Values of N (column dimension) */ /* 1 Number of values of NRHS */ /* 2 Values of NRHS (number of right hand sides) */ /* 5 Number of values of NB */ /* 1 3 3 3 20 Values of NB (the blocksize) */ /* 1 0 5 9 1 Values of NX (crossover point) */ /* 30.0 Threshold value of test ratio */ /* T Put T to test the LAPACK routines */ /* T Put T to test the driver routines */ /* T Put T to test the error exits */ /* ZGE 11 List types on next line if 0 < NTYPES < 11 */ /* ZGB 8 List types on next line if 0 < NTYPES < 8 */ /* ZGT 12 List types on next line if 0 < NTYPES < 12 */ /* ZPO 9 List types on next line if 0 < NTYPES < 9 */ /* ZPP 9 List types on next line if 0 < NTYPES < 9 */ /* ZPB 8 List types on next line if 0 < NTYPES < 8 */ /* ZPT 12 List types on next line if 0 < NTYPES < 12 */ /* ZHE 10 List types on next line if 0 < NTYPES < 10 */ /* ZHP 10 List types on next line if 0 < NTYPES < 10 */ /* ZSY 11 List types on next line if 0 < NTYPES < 11 */ /* ZSP 11 List types on next line if 0 < NTYPES < 11 */ /* ZTR 18 List types on next line if 0 < NTYPES < 18 */ /* ZTP 18 List types on next line if 0 < NTYPES < 18 */ /* ZTB 17 List types on next line if 0 < NTYPES < 17 */ /* ZQR 8 List types on next line if 0 < NTYPES < 8 */ /* ZRQ 8 List types on next line if 0 < NTYPES < 8 */ /* ZLQ 8 List types on next line if 0 < NTYPES < 8 */ /* ZQL 8 List types on next line if 0 < NTYPES < 8 */ /* ZQP 6 List types on next line if 0 < NTYPES < 6 */ /* ZTZ 3 List types on next line if 0 < NTYPES < 3 */ /* ZLS 6 List types on next line if 0 < NTYPES < 6 */ /* ZEQ */ /* Internal Parameters */ /* =================== */ /* NMAX INTEGER */ /* The maximum allowable value for N. */ /* MAXIN INTEGER */ /* The number of different values that can be used for each of */ /* M, N, or NB */ /* MAXRHS INTEGER */ /* The maximum number of right hand sides */ /* NIN INTEGER */ /* The unit number for input */ /* NOUT INTEGER */ /* The unit number for output */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Scalars in Common .. */ /* .. */ /* .. Arrays in Common .. */ /* .. */ /* .. Common blocks .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ s1 = dsecnd_(); lda = 132; fatal = FALSE_; /* Read a dummy line. */ s_rsle(&io___6); e_rsle(); /* Report values of parameters. */ ilaver_(&vers_major__, &vers_minor__, &vers_patch__); s_wsfe(&io___10); do_fio(&c__1, (char *)&vers_major__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&vers_minor__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&vers_patch__, (ftnlen)sizeof(integer)); e_wsfe(); /* Read the values of M */ s_rsle(&io___11); do_lio(&c__3, &c__1, (char *)&nm, (ftnlen)sizeof(integer)); e_rsle(); if (nm < 1) { s_wsfe(&io___13); do_fio(&c__1, " NM ", (ftnlen)4); do_fio(&c__1, (char *)&nm, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nm = 0; fatal = TRUE_; } else if (nm > 12) { s_wsfe(&io___14); do_fio(&c__1, " NM ", (ftnlen)4); do_fio(&c__1, (char *)&nm, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__12, (ftnlen)sizeof(integer)); e_wsfe(); nm = 0; fatal = TRUE_; } s_rsle(&io___15); i__1 = nm; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nm; for (i__ = 1; i__ <= i__1; ++i__) { if (mval[i__ - 1] < 0) { s_wsfe(&io___18); do_fio(&c__1, " M ", (ftnlen)4); do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (mval[i__ - 1] > 132) { s_wsfe(&io___19); do_fio(&c__1, " M ", (ftnlen)4); do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__132, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L10: */ } if (nm > 0) { s_wsfe(&io___20); do_fio(&c__1, "M ", (ftnlen)4); i__1 = nm; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Read the values of N */ s_rsle(&io___21); do_lio(&c__3, &c__1, (char *)&nn, (ftnlen)sizeof(integer)); e_rsle(); if (nn < 1) { s_wsfe(&io___23); do_fio(&c__1, " NN ", (ftnlen)4); do_fio(&c__1, (char *)&nn, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nn = 0; fatal = TRUE_; } else if (nn > 12) { s_wsfe(&io___24); do_fio(&c__1, " NN ", (ftnlen)4); do_fio(&c__1, (char *)&nn, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__12, (ftnlen)sizeof(integer)); e_wsfe(); nn = 0; fatal = TRUE_; } s_rsle(&io___25); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { if (nval[i__ - 1] < 0) { s_wsfe(&io___27); do_fio(&c__1, " N ", (ftnlen)4); do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (nval[i__ - 1] > 132) { s_wsfe(&io___28); do_fio(&c__1, " N ", (ftnlen)4); do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__132, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L20: */ } if (nn > 0) { s_wsfe(&io___29); do_fio(&c__1, "N ", (ftnlen)4); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Read the values of NRHS */ s_rsle(&io___30); do_lio(&c__3, &c__1, (char *)&nns, (ftnlen)sizeof(integer)); e_rsle(); if (nns < 1) { s_wsfe(&io___32); do_fio(&c__1, " NNS", (ftnlen)4); do_fio(&c__1, (char *)&nns, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nns = 0; fatal = TRUE_; } else if (nns > 12) { s_wsfe(&io___33); do_fio(&c__1, " NNS", (ftnlen)4); do_fio(&c__1, (char *)&nns, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__12, (ftnlen)sizeof(integer)); e_wsfe(); nns = 0; fatal = TRUE_; } s_rsle(&io___34); i__1 = nns; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)) ; } e_rsle(); i__1 = nns; for (i__ = 1; i__ <= i__1; ++i__) { if (nsval[i__ - 1] < 0) { s_wsfe(&io___36); do_fio(&c__1, "NRHS", (ftnlen)4); do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (nsval[i__ - 1] > 16) { s_wsfe(&io___37); do_fio(&c__1, "NRHS", (ftnlen)4); do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__16, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L30: */ } if (nns > 0) { s_wsfe(&io___38); do_fio(&c__1, "NRHS", (ftnlen)4); i__1 = nns; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Read the values of NB */ s_rsle(&io___39); do_lio(&c__3, &c__1, (char *)&nnb, (ftnlen)sizeof(integer)); e_rsle(); if (nnb < 1) { s_wsfe(&io___41); do_fio(&c__1, "NNB ", (ftnlen)4); do_fio(&c__1, (char *)&nnb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nnb = 0; fatal = TRUE_; } else if (nnb > 12) { s_wsfe(&io___42); do_fio(&c__1, "NNB ", (ftnlen)4); do_fio(&c__1, (char *)&nnb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__12, (ftnlen)sizeof(integer)); e_wsfe(); nnb = 0; fatal = TRUE_; } s_rsle(&io___43); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nbval[i__ - 1], (ftnlen)sizeof(integer)) ; } e_rsle(); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { if (nbval[i__ - 1] < 0) { s_wsfe(&io___45); do_fio(&c__1, " NB ", (ftnlen)4); do_fio(&c__1, (char *)&nbval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L40: */ } if (nnb > 0) { s_wsfe(&io___46); do_fio(&c__1, "NB ", (ftnlen)4); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nbval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Set NBVAL2 to be the set of unique values of NB */ nnb2 = 0; i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { nb = nbval[i__ - 1]; i__2 = nnb2; for (j = 1; j <= i__2; ++j) { if (nb == nbval2[j - 1]) { goto L60; } /* L50: */ } ++nnb2; nbval2[nnb2 - 1] = nb; L60: ; } /* Read the values of NX */ s_rsle(&io___51); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nxval[i__ - 1], (ftnlen)sizeof(integer)) ; } e_rsle(); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { if (nxval[i__ - 1] < 0) { s_wsfe(&io___53); do_fio(&c__1, " NX ", (ftnlen)4); do_fio(&c__1, (char *)&nxval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L70: */ } if (nnb > 0) { s_wsfe(&io___54); do_fio(&c__1, "NX ", (ftnlen)4); i__1 = nnb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nxval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Read the threshold value for the test ratios. */ s_rsle(&io___55); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); s_wsfe(&io___57); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Read the flag that indicates whether to test the LAPACK routines. */ s_rsle(&io___58); do_lio(&c__8, &c__1, (char *)&tstchk, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether to test the driver routines. */ s_rsle(&io___60); do_lio(&c__8, &c__1, (char *)&tstdrv, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether to test the error exits. */ s_rsle(&io___62); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); if (fatal) { s_wsfe(&io___64); e_wsfe(); s_stop("", (ftnlen)0); } /* Calculate and print the machine dependent constants. */ eps = dlamch_("Underflow threshold"); s_wsfe(&io___66); do_fio(&c__1, "underflow", (ftnlen)9); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); eps = dlamch_("Overflow threshold"); s_wsfe(&io___67); do_fio(&c__1, "overflow ", (ftnlen)9); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); eps = dlamch_("Epsilon"); s_wsfe(&io___68); do_fio(&c__1, "precision", (ftnlen)9); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); s_wsle(&io___69); e_wsle(); nrhs = nsval[0]; L80: /* Read a test path and the number of matrix types to use. */ ci__1.cierr = 0; ci__1.ciend = 1; ci__1.ciunit = 5; ci__1.cifmt = "(A72)"; i__1 = s_rsfe(&ci__1); if (i__1 != 0) { goto L140; } i__1 = do_fio(&c__1, aline, (ftnlen)72); if (i__1 != 0) { goto L140; } i__1 = e_rsfe(); if (i__1 != 0) { goto L140; } s_copy(path, aline, (ftnlen)3, (ftnlen)3); nmats = 30; i__ = 3; L90: ++i__; if (i__ > 72) { goto L130; } if (*(unsigned char *)&aline[i__ - 1] == ' ') { goto L90; } nmats = 0; L100: *(unsigned char *)c1 = *(unsigned char *)&aline[i__ - 1]; for (k = 1; k <= 10; ++k) { if (*(unsigned char *)c1 == *(unsigned char *)&intstr[k - 1]) { ic = k - 1; goto L120; } /* L110: */ } goto L130; L120: nmats = nmats * 10 + ic; ++i__; if (i__ > 72) { goto L130; } goto L100; L130: *(unsigned char *)c1 = *(unsigned char *)path; s_copy(c2, path + 1, (ftnlen)2, (ftnlen)2); /* Check first character for correct precision. */ if (! lsame_(c1, "Zomplex precision")) { s_wsfe(&io___78); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } else if (nmats <= 0) { /* Check for a positive number of tests requested. */ s_wsfe(&io___79); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } else if (lsamen_(&c__2, c2, "GE")) { /* GE: general matrices */ ntypes = 11; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkge_(dotype, &nm, mval, &nn, nval, &nnb2, nbval2, &nns, nsval, &thresh, &tsterr, &lda, a, &a[21912], &a[43824], b, &b[ 2112], &b[4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___87); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvge_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, &lda, a, &a[ 21912], &a[43824], b, &b[2112], &b[4224], &b[6336], s, work, rwork, iwork, &c__6); } else { s_wsfe(&io___89); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "GB")) { /* GB: general banded matrices */ la = 43692; lafac = 65472; ntypes = 8; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkgb_(dotype, &nm, mval, &nn, nval, &nnb2, nbval2, &nns, nsval, &thresh, &tsterr, a, &la, &a[43824], &lafac, b, &b[2112], &b[4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___92); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvgb_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, a, &la, &a[ 43824], &lafac, &a[109560], b, &b[2112], &b[4224], &b[ 6336], s, work, rwork, iwork, &c__6); } else { s_wsfe(&io___93); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "GT")) { /* GT: general tridiagonal matrices */ ntypes = 12; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkgt_(dotype, &nn, nval, &nns, nsval, &thresh, &tsterr, a, &a[ 21912], b, &b[2112], &b[4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___94); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvgt_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, a, &a[21912], b, &b[2112], &b[4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___95); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "PO")) { /* PO: positive definite matrices */ ntypes = 9; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkpo_(dotype, &nn, nval, &nnb2, nbval2, &nns, nsval, &thresh, & tsterr, &lda, a, &a[21912], &a[43824], b, &b[2112], &b[ 4224], work, rwork, &c__6); } else { s_wsfe(&io___96); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvpo_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, &lda, a, &a[ 21912], &a[43824], b, &b[2112], &b[4224], &b[6336], s, work, rwork, &c__6); } else { s_wsfe(&io___97); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "PP")) { /* PP: positive definite packed matrices */ ntypes = 9; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkpp_(dotype, &nn, nval, &nns, nsval, &thresh, &tsterr, &lda, a, &a[21912], &a[43824], b, &b[2112], &b[4224], work, rwork, &c__6); } else { s_wsfe(&io___98); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvpp_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, &lda, a, &a[ 21912], &a[43824], b, &b[2112], &b[4224], &b[6336], s, work, rwork, &c__6); } else { s_wsfe(&io___99); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "PB")) { /* PB: positive definite banded matrices */ ntypes = 8; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkpb_(dotype, &nn, nval, &nnb2, nbval2, &nns, nsval, &thresh, & tsterr, &lda, a, &a[21912], &a[43824], b, &b[2112], &b[ 4224], work, rwork, &c__6); } else { s_wsfe(&io___100); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvpb_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, &lda, a, &a[ 21912], &a[43824], b, &b[2112], &b[4224], &b[6336], s, work, rwork, &c__6); } else { s_wsfe(&io___101); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "PT")) { /* PT: positive definite tridiagonal matrices */ ntypes = 12; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkpt_(dotype, &nn, nval, &nns, nsval, &thresh, &tsterr, a, s, & a[21912], b, &b[2112], &b[4224], work, rwork, &c__6); } else { s_wsfe(&io___102); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvpt_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, a, s, &a[ 21912], b, &b[2112], &b[4224], work, rwork, &c__6); } else { s_wsfe(&io___103); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "HE")) { /* HE: Hermitian indefinite matrices */ ntypes = 10; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkhe_(dotype, &nn, nval, &nnb2, nbval2, &nns, nsval, &thresh, & tsterr, &lda, a, &a[21912], &a[43824], b, &b[2112], &b[ 4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___104); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvhe_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, &lda, a, &a[ 21912], &a[43824], b, &b[2112], &b[4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___105); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "HP")) { /* HP: Hermitian indefinite packed matrices */ ntypes = 10; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkhp_(dotype, &nn, nval, &nns, nsval, &thresh, &tsterr, &lda, a, &a[21912], &a[43824], b, &b[2112], &b[4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___106); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvhp_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, &lda, a, &a[ 21912], &a[43824], b, &b[2112], &b[4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___107); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "SY")) { /* SY: symmetric indefinite matrices */ ntypes = 11; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchksy_(dotype, &nn, nval, &nnb2, nbval2, &nns, nsval, &thresh, & tsterr, &lda, a, &a[21912], &a[43824], b, &b[2112], &b[ 4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___108); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvsy_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, &lda, a, &a[ 21912], &a[43824], b, &b[2112], &b[4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___109); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "SP")) { /* SP: symmetric indefinite packed matrices */ ntypes = 11; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchksp_(dotype, &nn, nval, &nns, nsval, &thresh, &tsterr, &lda, a, &a[21912], &a[43824], b, &b[2112], &b[4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___110); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } if (tstdrv) { zdrvsp_(dotype, &nn, nval, &nrhs, &thresh, &tsterr, &lda, a, &a[ 21912], &a[43824], b, &b[2112], &b[4224], work, rwork, iwork, &c__6); } else { s_wsfe(&io___111); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "TR")) { /* TR: triangular matrices */ ntypes = 18; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchktr_(dotype, &nn, nval, &nnb2, nbval2, &nns, nsval, &thresh, & tsterr, &lda, a, &a[21912], b, &b[2112], &b[4224], work, rwork, &c__6); } else { s_wsfe(&io___112); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "TP")) { /* TP: triangular packed matrices */ ntypes = 18; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchktp_(dotype, &nn, nval, &nns, nsval, &thresh, &tsterr, &lda, a, &a[21912], b, &b[2112], &b[4224], work, rwork, &c__6); } else { s_wsfe(&io___113); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "TB")) { /* TB: triangular banded matrices */ ntypes = 17; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchktb_(dotype, &nn, nval, &nns, nsval, &thresh, &tsterr, &lda, a, &a[21912], b, &b[2112], &b[4224], work, rwork, &c__6); } else { s_wsfe(&io___114); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "QR")) { /* QR: QR factorization */ ntypes = 8; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkqr_(dotype, &nm, mval, &nn, nval, &nnb, nbval, nxval, &nrhs, & thresh, &tsterr, &c__132, a, &a[21912], &a[43824], &a[ 65736], &a[87648], b, &b[2112], &b[4224], &b[6336], work, rwork, iwork, &c__6); } else { s_wsfe(&io___115); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "LQ")) { /* LQ: LQ factorization */ ntypes = 8; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchklq_(dotype, &nm, mval, &nn, nval, &nnb, nbval, nxval, &nrhs, & thresh, &tsterr, &c__132, a, &a[21912], &a[43824], &a[ 65736], &a[87648], b, &b[2112], &b[4224], &b[6336], work, rwork, iwork, &c__6); } else { s_wsfe(&io___116); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "QL")) { /* QL: QL factorization */ ntypes = 8; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkql_(dotype, &nm, mval, &nn, nval, &nnb, nbval, nxval, &nrhs, & thresh, &tsterr, &c__132, a, &a[21912], &a[43824], &a[ 65736], &a[87648], b, &b[2112], &b[4224], &b[6336], work, rwork, iwork, &c__6); } else { s_wsfe(&io___117); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "RQ")) { /* RQ: RQ factorization */ ntypes = 8; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkrq_(dotype, &nm, mval, &nn, nval, &nnb, nbval, nxval, &nrhs, & thresh, &tsterr, &c__132, a, &a[21912], &a[43824], &a[ 65736], &a[87648], b, &b[2112], &b[4224], &b[6336], work, rwork, iwork, &c__6); } else { s_wsfe(&io___118); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "EQ")) { /* EQ: Equilibration routines for general and positive definite */ /* matrices (THREQ should be between 2 and 10) */ if (tstchk) { zchkeq_(&threq, &c__6); } else { s_wsfe(&io___119); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "TZ")) { /* TZ: Trapezoidal matrix */ ntypes = 3; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchktz_(dotype, &nm, mval, &nn, nval, &thresh, &tsterr, a, &a[ 21912], s, &s[132], b, work, rwork, &c__6); } else { s_wsfe(&io___120); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "QP")) { /* QP: QR factorization with pivoting */ ntypes = 6; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstchk) { zchkqp_(dotype, &nm, mval, &nn, nval, &thresh, &tsterr, a, &a[ 21912], s, &s[132], b, work, rwork, iwork, &c__6); zchkq3_(dotype, &nm, mval, &nn, nval, &nnb, nbval, nxval, &thresh, a, &a[21912], s, &s[132], b, work, rwork, iwork, &c__6); } else { s_wsfe(&io___121); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else if (lsamen_(&c__2, c2, "LS")) { /* LS: Least squares drivers */ ntypes = 6; alareq_(path, &nmats, dotype, &ntypes, &c__5, &c__6); if (tstdrv) { zdrvls_(dotype, &nm, mval, &nn, nval, &nns, nsval, &nnb, nbval, nxval, &thresh, &tsterr, a, &a[21912], &a[43824], &a[ 65736], &a[87648], s, &s[132], work, rwork, iwork, &c__6); } else { s_wsfe(&io___122); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else { s_wsfe(&io___123); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } /* Go back to get another input line. */ goto L80; /* Branch to this line when the last record is read. */ L140: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); s2 = dsecnd_(); s_wsfe(&io___125); e_wsfe(); s_wsfe(&io___126); d__1 = s2 - s1; do_fio(&c__1, (char *)&d__1, (ftnlen)sizeof(doublereal)); e_wsfe(); /* End of ZCHKAA */ return 0; } /* MAIN__ */
/* Subroutine */ int ztimtd_(char *line, integer *nm, integer *mval, integer * nn, integer *nval, integer *nnb, integer *nbval, integer *nxval, integer *nlda, integer *ldaval, doublereal *timmin, doublecomplex *a, doublecomplex *b, doublereal *d__, doublecomplex *tau, doublecomplex * work, doublereal *reslts, integer *ldr1, integer *ldr2, integer *ldr3, integer *nout, ftnlen line_len) { /* Initialized data */ static char subnam[6*3] = "ZHETRD" "ZUNGTR" "ZUNMTR"; static char sides[1*2] = "L" "R"; static char transs[1*2] = "N" "C"; static char uplos[1*2] = "U" "L"; static integer iseed[4] = { 0,0,0,1 }; /* Format strings */ static char fmt_9999[] = "(1x,a6,\002 timing run not attempted\002,/)"; static char fmt_9998[] = "(/\002 *** Speed of \002,a6,\002 in megaflops " "*** \002)"; static char fmt_9997[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)"; static char fmt_9996[] = "(/5x,a6,\002 with UPLO = '\002,a1,\002'\002,/)"; static char fmt_9995[] = "(/5x,a6,\002 with SIDE = '\002,a1,\002', UPLO " "= '\002,a1,\002', TRANS = '\002,a1,\002', \002,a1,\002 =\002,i6," "/)"; /* System generated locals */ integer reslts_dim1, reslts_dim2, reslts_dim3, reslts_offset, i__1, i__2, i__3, i__4, i__5, i__6; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ static integer ilda; static char side[1]; static integer info; static char path[3]; static doublereal time; static integer isub; static char uplo[1]; static integer i__, m, n; static char cname[6]; static integer iside; extern doublereal dopla_(char *, integer *, integer *, integer *, integer *, integer *); static integer itoff, itran; extern /* Subroutine */ int icopy_(integer *, integer *, integer *, integer *, integer *); static char trans[1]; static integer iuplo, i3, i4, m1, n1; static doublereal s1, s2; extern /* Subroutine */ int dprtb3_(char *, char *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen); static integer ic, nb, im, in; extern doublereal dsecnd_(void); static integer lw, nx, reseed[4]; extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, ftnlen); extern doublereal dmflop_(doublereal *, doublereal *, integer *); extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), dprtbl_( char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen), xlaenv_(integer *, integer *), zhetrd_(char *, integer *, doublecomplex *, integer *, doublereal *, doublereal *, doublecomplex *, doublecomplex *, integer *, integer *); static doublereal untime; extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static logical timsub[3]; extern /* Subroutine */ int ztimmg_(integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *), zlatms_( integer *, integer *, char *, integer *, char *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, char *, doublecomplex *, integer *, doublecomplex *, integer *), zungtr_(char *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *), zunmtr_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, integer *); static integer lda, icl, inb; static doublereal ops; static char lab1[1], lab2[1]; /* Fortran I/O blocks */ static cilist io___10 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___11 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___42 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___44 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9995, 0 }; #define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6] #define reslts_ref(a_1,a_2,a_3,a_4) reslts[(((a_4)*reslts_dim3 + (a_3))*\ reslts_dim2 + (a_2))*reslts_dim1 + a_1] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= ZTIMTD times the LAPACK routines ZHETRD, ZUNGTR, and CUNMTR. Arguments ========= LINE (input) CHARACTER*80 The input line that requested this routine. The first six characters contain either the name of a subroutine or a generic path name. The remaining characters may be used to specify the individual routines to be timed. See ATIMIN for a full description of the format of the input line. NM (input) INTEGER The number of values of M contained in the vector MVAL. MVAL (input) INTEGER array, dimension (NM) The values of the matrix size M. NN (input) INTEGER The number of values of N contained in the vector NVAL. NVAL (input) INTEGER array, dimension (NN) The values of the matrix column dimension N. NNB (input) INTEGER The number of values of NB and NX contained in the vectors NBVAL and NXVAL. The blocking parameters are used in pairs (NB,NX). NBVAL (input) INTEGER array, dimension (NNB) The values of the blocksize NB. NXVAL (input) INTEGER array, dimension (NNB) The values of the crossover point NX. NLDA (input) INTEGER The number of values of LDA contained in the vector LDAVAL. LDAVAL (input) INTEGER array, dimension (NLDA) The values of the leading dimension of the array A. TIMMIN (input) DOUBLE PRECISION The minimum time a subroutine will be timed. A (workspace) COMPLEX*16 array, dimension (LDAMAX*NMAX) where LDAMAX and NMAX are the maximum values of LDA and N. B (workspace) COMPLEX*16 array, dimension (LDAMAX*NMAX) D (workspace) DOUBLE PRECISION array, dimension (2*NMAX-1) TAU (workspace) COMPLEX*16 array, dimension (NMAX) WORK (workspace) COMPLEX*16 array, dimension (NMAX*NBMAX) where NBMAX is the maximum value of NB. RESLTS (workspace) DOUBLE PRECISION array, dimension (LDR1,LDR2,LDR3,4*NN+3) The timing results for each subroutine over the relevant values of M, (NB,NX), LDA, and N. LDR1 (input) INTEGER The first dimension of RESLTS. LDR1 >= max(1,NNB). LDR2 (input) INTEGER The second dimension of RESLTS. LDR2 >= max(1,NM). LDR3 (input) INTEGER The third dimension of RESLTS. LDR3 >= max(1,2*NLDA). NOUT (input) INTEGER The unit number for output. Internal Parameters =================== MODE INTEGER The matrix type. MODE = 3 is a geometric distribution of eigenvalues. See ZLATMS for further details. COND DOUBLE PRECISION The condition number of the matrix. The singular values are set to values from DMAX to DMAX/COND. DMAX DOUBLE PRECISION The magnitude of the largest singular value. ===================================================================== Parameter adjustments */ --mval; --nval; --nbval; --nxval; --ldaval; --a; --b; --d__; --tau; --work; reslts_dim1 = *ldr1; reslts_dim2 = *ldr2; reslts_dim3 = *ldr3; reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * (1 + reslts_dim3 * 1) ); reslts -= reslts_offset; /* Function Body Extract the timing request from the input line. */ s_copy(path, "Zomplex precision", (ftnlen)1, (ftnlen)17); s_copy(path + 1, "TD", (ftnlen)2, (ftnlen)2); atimin_(path, line, &c__3, subnam, timsub, nout, &info, (ftnlen)3, ( ftnlen)80, (ftnlen)6); if (info != 0) { goto L220; } /* Check that M <= LDA for the input values. */ s_copy(cname, line, (ftnlen)6, (ftnlen)6); atimck_(&c__2, cname, nm, &mval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); if (info > 0) { io___10.ciunit = *nout; s_wsfe(&io___10); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); goto L220; } /* Check that K <= LDA for ZUNMTR */ if (timsub[2]) { atimck_(&c__3, cname, nn, &nval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); if (info > 0) { io___11.ciunit = *nout; s_wsfe(&io___11); do_fio(&c__1, subnam_ref(0, 3), (ftnlen)6); e_wsfe(); timsub[2] = FALSE_; } } /* Do first for UPLO = 'U', then for UPLO = 'L' */ for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; /* Do for each value of M: */ i__1 = *nm; for (im = 1; im <= i__1; ++im) { m = mval[im]; icopy_(&c__4, iseed, &c__1, reseed, &c__1); /* Do for each value of LDA: */ i__2 = *nlda; for (ilda = 1; ilda <= i__2; ++ilda) { lda = ldaval[ilda]; i3 = (iuplo - 1) * *nlda + ilda; /* Do for each pair of values (NB, NX) in NBVAL and NXVAL. */ i__3 = *nnb; for (inb = 1; inb <= i__3; ++inb) { nb = nbval[inb]; xlaenv_(&c__1, &nb); nx = nxval[inb]; xlaenv_(&c__3, &nx); /* Computing MAX */ i__4 = 1, i__5 = m * max(1,nb); lw = max(i__4,i__5); /* Generate a test matrix of order M. */ icopy_(&c__4, reseed, &c__1, iseed, &c__1); zlatms_(&m, &m, "Uniform", iseed, "Symmetric", &d__[1], & c__3, &c_b27, &c_b28, &m, &m, "No packing", &b[1], &lda, &work[1], &info); if (timsub[0]) { /* ZHETRD: Reduction to tridiagonal form */ zlacpy_(uplo, &m, &m, &b[1], &lda, &a[1], &lda); ic = 0; s1 = dsecnd_(); L10: zhetrd_(uplo, &m, &a[1], &lda, &d__[1], &d__[m + 1], & tau[1], &work[1], &lw, &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { zlacpy_(uplo, &m, &m, &b[1], &lda, &a[1], &lda); goto L10; } /* Subtract the time used in ZLACPY. */ icl = 1; s1 = dsecnd_(); L20: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { zlacpy_(uplo, &m, &m, &a[1], &lda, &b[1], &lda); goto L20; } time = (time - untime) / (doublereal) ic; ops = dopla_("ZHETRD", &m, &m, &c_n1, &c_n1, &nb); reslts_ref(inb, im, i3, 1) = dmflop_(&ops, &time, & info); } else { /* If ZHETRD was not timed, generate a matrix and factor it using ZHETRD anyway so that the factored form of the matrix can be used in timing the other routines. */ zlacpy_(uplo, &m, &m, &b[1], &lda, &a[1], &lda); zhetrd_(uplo, &m, &a[1], &lda, &d__[1], &d__[m + 1], & tau[1], &work[1], &lw, &info); } if (timsub[1]) { /* ZUNGTR: Generate the orthogonal matrix Q from the reduction to Hessenberg form A = Q*H*Q' */ zlacpy_(uplo, &m, &m, &a[1], &lda, &b[1], &lda); ic = 0; s1 = dsecnd_(); L30: zungtr_(uplo, &m, &b[1], &lda, &tau[1], &work[1], &lw, &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { zlacpy_(uplo, &m, &m, &a[1], &lda, &b[1], &lda); goto L30; } /* Subtract the time used in ZLACPY. */ icl = 1; s1 = dsecnd_(); L40: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { zlacpy_(uplo, &m, &m, &a[1], &lda, &b[1], &lda); goto L40; } time = (time - untime) / (doublereal) ic; /* Op count for ZUNGTR: same as ZUNGQR( N-1, N-1, N-1, ... ) */ i__4 = m - 1; i__5 = m - 1; i__6 = m - 1; ops = dopla_("ZUNGQR", &i__4, &i__5, &i__6, &c_n1, & nb); reslts_ref(inb, im, i3, 2) = dmflop_(&ops, &time, & info); } if (timsub[2]) { /* ZUNMTR: Multiply by Q stored as a product of elementary transformations */ i4 = 2; for (iside = 1; iside <= 2; ++iside) { *(unsigned char *)side = *(unsigned char *)&sides[ iside - 1]; i__4 = *nn; for (in = 1; in <= i__4; ++in) { n = nval[in]; /* Computing MAX */ i__5 = 1, i__6 = max(1,nb) * n; lw = max(i__5,i__6); if (iside == 1) { m1 = m; n1 = n; } else { m1 = n; n1 = m; } itoff = 0; for (itran = 1; itran <= 2; ++itran) { *(unsigned char *)trans = *(unsigned char *)&transs[itran - 1]; ztimmg_(&c__0, &m1, &n1, &b[1], &lda, & c__0, &c__0); ic = 0; s1 = dsecnd_(); L50: zunmtr_(side, uplo, trans, &m1, &n1, &a[1] , &lda, &tau[1], &b[1], &lda, & work[1], &lw, &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&c__0, &m1, &n1, &b[1], &lda, &c__0, &c__0); goto L50; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L60: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&c__0, &m1, &n1, &b[1], &lda, &c__0, &c__0); goto L60; } time = (time - untime) / (doublereal) ic; /* Op count for ZUNMTR, SIDE='L': same as ZUNMQR( 'L', TRANS, M-1, N, M-1, ...) Op count for ZUNMTR, SIDE='R': same as ZUNMQR( 'R', TRANS, M, N-1, N-1, ...) */ if (iside == 1) { i__5 = m1 - 1; i__6 = m1 - 1; ops = dopla_("ZUNMQR", &i__5, &n1, & i__6, &c_n1, &nb); } else { i__5 = n1 - 1; i__6 = n1 - 1; ops = dopla_("ZUNMQR", &m1, &i__5, & i__6, &c__1, &nb); } reslts_ref(inb, im, i3, i4 + itoff + in) = dmflop_(&ops, &time, &info); itoff = *nn; /* L70: */ } /* L80: */ } i4 += *nn << 1; /* L90: */ } } /* L100: */ } /* L110: */ } /* L120: */ } /* L130: */ } /* Print tables of results for ZHETRD and ZUNGTR */ for (isub = 1; isub <= 2; ++isub) { if (! timsub[isub - 1]) { goto L160; } io___42.ciunit = *nout; s_wsfe(&io___42); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); e_wsfe(); if (*nlda > 1) { i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___44.ciunit = *nout; s_wsfe(&io___44); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer)); e_wsfe(); /* L140: */ } } i3 = 1; for (iuplo = 1; iuplo <= 2; ++iuplo) { io___45.ciunit = *nout; s_wsfe(&io___45); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); do_fio(&c__1, uplos + (iuplo - 1), (ftnlen)1); e_wsfe(); dprtb3_("( NB, NX)", "N", nnb, &nbval[1], &nxval[1], nm, &mval[ 1], nlda, &reslts_ref(1, 1, i3, isub), ldr1, ldr2, nout, ( ftnlen)11, (ftnlen)1); i3 += *nlda; /* L150: */ } L160: ; } /* Print tables of results for ZUNMTR */ isub = 3; if (timsub[isub - 1]) { i4 = 2; for (iside = 1; iside <= 2; ++iside) { if (iside == 1) { *(unsigned char *)lab1 = 'M'; *(unsigned char *)lab2 = 'N'; if (*nlda > 1) { io___48.ciunit = *nout; s_wsfe(&io___48); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); e_wsfe(); i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___49.ciunit = *nout; s_wsfe(&io___49); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof( integer)); e_wsfe(); /* L170: */ } } } else { *(unsigned char *)lab1 = 'N'; *(unsigned char *)lab2 = 'M'; } for (itran = 1; itran <= 2; ++itran) { i__1 = *nn; for (in = 1; in <= i__1; ++in) { i3 = 1; for (iuplo = 1; iuplo <= 2; ++iuplo) { io___50.ciunit = *nout; s_wsfe(&io___50); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); do_fio(&c__1, sides + (iside - 1), (ftnlen)1); do_fio(&c__1, uplos + (iuplo - 1), (ftnlen)1); do_fio(&c__1, transs + (itran - 1), (ftnlen)1); do_fio(&c__1, lab2, (ftnlen)1); do_fio(&c__1, (char *)&nval[in], (ftnlen)sizeof( integer)); e_wsfe(); dprtbl_("NB", lab1, nnb, &nbval[1], nm, &mval[1], nlda, &reslts_ref(1, 1, i3, i4 + in), ldr1, ldr2, nout, (ftnlen)2, (ftnlen)1); i3 += *nlda; /* L180: */ } /* L190: */ } i4 += *nn; /* L200: */ } /* L210: */ } } L220: /* Print a table of results for each timed routine. */ return 0; /* End of ZTIMTD */ } /* ztimtd_ */
/* Subroutine */ int ztimb3_(char *line, integer *nm, integer *mval, integer * nn, integer *nval, integer *nk, integer *kval, integer *nlda, integer *ldaval, doublereal *timmin, doublecomplex *a, doublecomplex *b, doublecomplex *c__, doublereal *reslts, integer *ldr1, integer *ldr2, integer *nout, ftnlen line_len) { /* Initialized data */ static char names[6*9] = "ZGEMM " "ZHEMM " "ZSYMM " "ZHERK " "ZHER2K" "ZSYRK " "ZSYR2K" "ZTRMM " "ZTRSM "; static char trans[1*3] = "N" "T" "C"; static char sides[1*2] = "L" "R"; static char uplos[1*2] = "U" "L"; /* Format strings */ static char fmt_9999[] = "(1x,a6,\002 timing run not attempted\002,/)"; static char fmt_9998[] = "(/\002 *** Speed of \002,a6,\002 in megaflops " "***\002)"; static char fmt_9997[] = "(5x,\002with LDA = \002,i5)"; static char fmt_9996[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)"; static char fmt_9995[] = "(/1x,\002ZGEMM with TRANSA = '\002,a1,\002', " "TRANSB = '\002,a1,\002'\002)"; static char fmt_9994[] = "(/1x,\002K = \002,i4,/)"; static char fmt_9993[] = "(/1x,a6,\002 with SIDE = '\002,a1,\002', UPLO " "= '\002,a1,\002'\002,/)"; static char fmt_9992[] = "(/1x,a6,\002 with UPLO = '\002,a1,\002', TRANS" " = '\002,a1,\002'\002,/)"; static char fmt_9991[] = "(/1x,a6,\002 with SIDE = '\002,a1,\002', UPLO " "= '\002,a1,\002',\002,\002 TRANS = '\002,a1,\002'\002,/)"; static char fmt_9990[] = "(/////)"; /* System generated locals */ integer reslts_dim1, reslts_dim2, reslts_offset, i__1, i__2, i__3, i__4; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(char *, char *, ftnlen, ftnlen); /* Local variables */ static integer ilda; static char side[1]; static integer imat, info; static char path[3]; static doublereal time; static integer isub; static char uplo[1]; static integer i__, k, m, n; static char cname[6]; static integer iside; extern logical lsame_(char *, char *); extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zhemm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *); static integer iuplo; static doublereal s1, s2; extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *), zsymm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *), zsyrk_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer * , doublecomplex *, doublecomplex *, integer *); extern doublereal dopbl3_(char *, integer *, integer *, integer *) ; extern /* Subroutine */ int zher2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *); static integer ic, ik, im, in; extern doublereal dsecnd_(void); extern /* Subroutine */ int zsyr2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), atimck_(integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, ftnlen); extern doublereal dmflop_(doublereal *, doublereal *, integer *); extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), dprtbl_( char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen); static char transa[1], transb[1]; static doublereal untime; static logical timsub[9]; extern /* Subroutine */ int ztimmg_(integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *); static integer lda, icl, ita, itb; static doublereal ops; /* Fortran I/O blocks */ static cilist io___9 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___11 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___12 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___14 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___35 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___42 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___44 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___46 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___47 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9990, 0 }; #define names_ref(a_0,a_1) &names[(a_1)*6 + a_0 - 6] #define reslts_ref(a_1,a_2,a_3) reslts[((a_3)*reslts_dim2 + (a_2))*\ reslts_dim1 + a_1] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= ZTIMB3 times the Level 3 BLAS routines. Arguments ========= LINE (input) CHARACTER*80 The input line that requested this routine. The first six characters contain either the name of a subroutine or a generic path name. The remaining characters may be used to specify the individual routines to be timed. See ATIMIN for a full description of the format of the input line. NM (input) INTEGER The number of values of M contained in the vector MVAL. MVAL (input) INTEGER array, dimension (NM) The values of the matrix row dimension M. NN (input) INTEGER The number of values of N contained in the vector NVAL. NVAL (input) INTEGER array, dimension (NN) The values of the matrix column dimension N. NK (input) INTEGER The number of values of K contained in the vector KVAL. KVAL (input) INTEGER array, dimension (NK) The values of K. K is used as the intermediate matrix dimension for ZGEMM (the product of an M x K matrix and a K x N matrix) and as the dimension of the rank-K update in ZHERK and ZSYRK. NLDA (input) INTEGER The number of values of LDA contained in the vector LDAVAL. LDAVAL (input) INTEGER array, dimension (NLDA) The values of the leading dimension of the array A. TIMMIN (input) DOUBLE PRECISION The minimum time a subroutine will be timed. A (workspace) COMPLEX*16 array, dimension (LDAMAX*NMAX) where LDAMAX and NMAX are the maximum values permitted for LDA and N. B (workspace) COMPLEX*16 array, dimension (LDAMAX*NMAX) C (workspace) COMPLEX*16 array, dimension (LDAMAX*NMAX) RESLTS (output) DOUBLE PRECISION array, dimension (LDR1,LDR2,NLDA) The timing results for each subroutine over the relevant values of M, N, K, and LDA. LDR1 (input) INTEGER The first dimension of RESLTS. LDR1 >= max(1,NM,NK). LDR2 (input) INTEGER The second dimension of RESLTS. LDR2 >= max(1,NN). NOUT (input) INTEGER The unit number for output. ===================================================================== Parameter adjustments */ --mval; --nval; --kval; --ldaval; --a; --b; --c__; reslts_dim1 = *ldr1; reslts_dim2 = *ldr2; reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * 1); reslts -= reslts_offset; /* Function Body Extract the timing request from the input line. */ s_copy(path, "Zomplex precision", (ftnlen)1, (ftnlen)17); s_copy(path + 1, "B3", (ftnlen)2, (ftnlen)2); atimin_(path, line, &c__9, names, timsub, nout, &info, (ftnlen)3, line_len, (ftnlen)6); if (info != 0) { goto L690; } /* Check that M <= LDA. */ s_copy(cname, line, (ftnlen)6, (ftnlen)6); atimck_(&c__1, cname, nm, &mval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); if (info > 0) { io___9.ciunit = *nout; s_wsfe(&io___9); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); goto L690; } /* Time each routine. */ for (isub = 1; isub <= 9; ++isub) { if (! timsub[isub - 1]) { goto L680; } /* Print header. */ s_copy(cname, names_ref(0, isub), (ftnlen)6, (ftnlen)6); io___11.ciunit = *nout; s_wsfe(&io___11); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); if (*nlda == 1) { io___12.ciunit = *nout; s_wsfe(&io___12); do_fio(&c__1, (char *)&ldaval[1], (ftnlen)sizeof(integer)); e_wsfe(); } else { i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___14.ciunit = *nout; s_wsfe(&io___14); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer)); e_wsfe(); /* L10: */ } } /* Time ZGEMM */ if (s_cmp(cname, "ZGEMM ", (ftnlen)6, (ftnlen)6) == 0) { for (ita = 1; ita <= 3; ++ita) { *(unsigned char *)transa = *(unsigned char *)&trans[ita - 1]; for (itb = 1; itb <= 3; ++itb) { *(unsigned char *)transb = *(unsigned char *)&trans[itb - 1]; i__1 = *nk; for (ik = 1; ik <= i__1; ++ik) { k = kval[ik]; i__2 = *nlda; for (ilda = 1; ilda <= i__2; ++ilda) { lda = ldaval[ilda]; i__3 = *nm; for (im = 1; im <= i__3; ++im) { m = mval[im]; i__4 = *nn; for (in = 1; in <= i__4; ++in) { n = nval[in]; if (*(unsigned char *)transa == 'N') { ztimmg_(&c__1, &m, &k, &a[1], &lda, & c__0, &c__0); } else { ztimmg_(&c__1, &k, &m, &a[1], &lda, & c__0, &c__0); } if (*(unsigned char *)transb == 'N') { ztimmg_(&c__0, &k, &n, &b[1], &lda, & c__0, &c__0); } else { ztimmg_(&c__0, &n, &k, &b[1], &lda, & c__0, &c__0); } ztimmg_(&c__1, &m, &n, &c__[1], &lda, & c__0, &c__0); ic = 0; s1 = dsecnd_(); L20: zgemm_(transa, transb, &m, &n, &k, &c_b1, &a[1], &lda, &b[1], &lda, &c_b1, & c__[1], &lda); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&c__1, &m, &n, &c__[1], &lda, &c__0, &c__0); goto L20; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L30: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&c__1, &m, &n, &c__[1], &lda, &c__0, &c__0); goto L30; } time = (time - untime) / (doublereal) ic; ops = dopbl3_(cname, &m, &n, &k); reslts_ref(im, in, ilda) = dmflop_(&ops, & time, &c__0); /* L40: */ } /* L50: */ } /* L60: */ } if (ik == 1) { io___34.ciunit = *nout; s_wsfe(&io___34); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); e_wsfe(); } io___35.ciunit = *nout; s_wsfe(&io___35); do_fio(&c__1, (char *)&kval[ik], (ftnlen)sizeof( integer)); e_wsfe(); dprtbl_("M", "N", nm, &mval[1], nn, &nval[1], nlda, & reslts[reslts_offset], ldr1, ldr2, nout, ( ftnlen)1, (ftnlen)1); /* L70: */ } /* L80: */ } /* L90: */ } /* Time ZHEMM */ } else if (s_cmp(cname, "ZHEMM ", (ftnlen)6, (ftnlen)6) == 0) { for (iside = 1; iside <= 2; ++iside) { *(unsigned char *)side = *(unsigned char *)&sides[iside - 1]; for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; if (lsame_(uplo, "U")) { imat = 6; } else { imat = -6; } i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nm; for (im = 1; im <= i__2; ++im) { m = mval[im]; i__3 = *nn; for (in = 1; in <= i__3; ++in) { n = nval[in]; if (iside == 1) { ztimmg_(&imat, &m, &m, &a[1], &lda, &c__0, &c__0); ztimmg_(&c__0, &m, &n, &b[1], &lda, &c__0, &c__0); } else { ztimmg_(&c__0, &m, &n, &b[1], &lda, &c__0, &c__0); ztimmg_(&imat, &n, &n, &a[1], &lda, &c__0, &c__0); } ztimmg_(&c__1, &m, &n, &c__[1], &lda, &c__0, & c__0); ic = 0; s1 = dsecnd_(); L100: zhemm_(side, uplo, &m, &n, &c_b1, &a[1], &lda, &b[1], &lda, &c_b1, &c__[1], &lda); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&c__1, &m, &n, &c__[1], &lda, & c__0, &c__0); goto L100; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L110: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&c__1, &m, &n, &c__[1], &lda, & c__0, &c__0); goto L110; } time = (time - untime) / (doublereal) ic; i__4 = iside - 1; ops = dopbl3_(cname, &m, &n, &i__4) ; reslts_ref(im, in, ilda) = dmflop_(&ops, & time, &c__0); /* L120: */ } /* L130: */ } /* L140: */ } io___41.ciunit = *nout; s_wsfe(&io___41); do_fio(&c__1, "ZHEMM ", (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); e_wsfe(); dprtbl_("M", "N", nm, &mval[1], nn, &nval[1], nlda, & reslts[reslts_offset], ldr1, ldr2, nout, (ftnlen) 1, (ftnlen)1); /* L150: */ } /* L160: */ } /* Time ZSYMM */ } else if (s_cmp(cname, "ZSYMM ", (ftnlen)6, (ftnlen)6) == 0) { for (iside = 1; iside <= 2; ++iside) { *(unsigned char *)side = *(unsigned char *)&sides[iside - 1]; for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; if (lsame_(uplo, "U")) { imat = 8; } else { imat = -8; } i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nm; for (im = 1; im <= i__2; ++im) { m = mval[im]; i__3 = *nn; for (in = 1; in <= i__3; ++in) { n = nval[in]; if (iside == 1) { ztimmg_(&imat, &m, &m, &a[1], &lda, &c__0, &c__0); ztimmg_(&c__0, &m, &n, &b[1], &lda, &c__0, &c__0); } else { ztimmg_(&c__0, &m, &n, &b[1], &lda, &c__0, &c__0); ztimmg_(&imat, &n, &n, &a[1], &lda, &c__0, &c__0); } ztimmg_(&c__1, &m, &n, &c__[1], &lda, &c__0, & c__0); ic = 0; s1 = dsecnd_(); L170: zsymm_(side, uplo, &m, &n, &c_b1, &a[1], &lda, &b[1], &lda, &c_b1, &c__[1], &lda); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&c__1, &m, &n, &c__[1], &lda, & c__0, &c__0); goto L170; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L180: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&c__1, &m, &n, &c__[1], &lda, & c__0, &c__0); goto L180; } time = (time - untime) / (doublereal) ic; i__4 = iside - 1; ops = dopbl3_(cname, &m, &n, &i__4) ; reslts_ref(im, in, ilda) = dmflop_(&ops, & time, &c__0); /* L190: */ } /* L200: */ } /* L210: */ } io___42.ciunit = *nout; s_wsfe(&io___42); do_fio(&c__1, "ZSYMM ", (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); e_wsfe(); dprtbl_("M", "N", nm, &mval[1], nn, &nval[1], nlda, & reslts[reslts_offset], ldr1, ldr2, nout, (ftnlen) 1, (ftnlen)1); /* L220: */ } /* L230: */ } /* Time ZHERK */ } else if (s_cmp(cname, "ZHERK ", (ftnlen)6, (ftnlen)6) == 0) { for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; if (lsame_(uplo, "U")) { imat = 6; } else { imat = -6; } for (ita = 1; ita <= 3; ++ita) { *(unsigned char *)transa = *(unsigned char *)&trans[ita - 1]; if (*(unsigned char *)transa != 'T') { i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nk; for (ik = 1; ik <= i__2; ++ik) { k = kval[ik]; if (*(unsigned char *)transa == 'N') { ztimmg_(&c__1, &n, &k, &a[1], &lda, &c__0, &c__0); } else { ztimmg_(&c__1, &k, &n, &a[1], &lda, &c__0, &c__0); } i__3 = *nn; for (in = 1; in <= i__3; ++in) { n = nval[in]; ztimmg_(&imat, &n, &n, &c__[1], &lda, & c__0, &c__0); ic = 0; s1 = dsecnd_(); L240: zherk_(uplo, transa, &n, &k, &c_b156, &a[ 1], &lda, &c_b156, &c__[1], &lda); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&imat, &n, &n, &c__[1], &lda, &c__0, &c__0); goto L240; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L250: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&imat, &n, &n, &c__[1], &lda, &c__0, &c__0); goto L250; } time = (time - untime) / (doublereal) ic; ops = dopbl3_(cname, &n, &n, &k); reslts_ref(ik, in, ilda) = dmflop_(&ops, & time, &c__0); /* L260: */ } /* L270: */ } /* L280: */ } io___43.ciunit = *nout; s_wsfe(&io___43); do_fio(&c__1, cname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); e_wsfe(); dprtbl_("K", "N", nk, &kval[1], nn, &nval[1], nlda, & reslts[reslts_offset], ldr1, ldr2, nout, ( ftnlen)1, (ftnlen)1); } /* L290: */ } /* L300: */ } /* Time ZHER2K */ } else if (s_cmp(cname, "ZHER2K", (ftnlen)6, (ftnlen)6) == 0) { for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; if (lsame_(uplo, "U")) { imat = 6; } else { imat = -6; } for (itb = 1; itb <= 3; ++itb) { *(unsigned char *)transb = *(unsigned char *)&trans[itb - 1]; if (*(unsigned char *)transb != 'T') { i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nk; for (ik = 1; ik <= i__2; ++ik) { k = kval[ik]; if (*(unsigned char *)transb == 'N') { ztimmg_(&c__1, &n, &k, &a[1], &lda, &c__0, &c__0); ztimmg_(&c__0, &n, &k, &b[1], &lda, &c__0, &c__0); } else { ztimmg_(&c__1, &k, &n, &a[1], &lda, &c__0, &c__0); ztimmg_(&c__0, &k, &n, &b[1], &lda, &c__0, &c__0); } i__3 = *nn; for (in = 1; in <= i__3; ++in) { n = nval[in]; ztimmg_(&imat, &n, &n, &c__[1], &lda, & c__0, &c__0); ic = 0; s1 = dsecnd_(); L310: zher2k_(uplo, transb, &n, &k, &c_b1, &a[1] , &lda, &b[1], &lda, &c_b156, & c__[1], &lda); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&imat, &n, &n, &c__[1], &lda, &c__0, &c__0); goto L310; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L320: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&imat, &n, &n, &c__[1], &lda, &c__0, &c__0); goto L320; } time = (time - untime) / (doublereal) ic; ops = dopbl3_(cname, &n, &n, &k); reslts_ref(ik, in, ilda) = dmflop_(&ops, & time, &c__0); /* L330: */ } /* L340: */ } /* L350: */ } io___44.ciunit = *nout; s_wsfe(&io___44); do_fio(&c__1, cname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); e_wsfe(); dprtbl_("K", "N", nk, &kval[1], nn, &nval[1], nlda, & reslts[reslts_offset], ldr1, ldr2, nout, ( ftnlen)1, (ftnlen)1); } /* L360: */ } /* L370: */ } /* Time ZSYRK */ } else if (s_cmp(cname, "ZSYRK ", (ftnlen)6, (ftnlen)6) == 0) { for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; if (lsame_(uplo, "U")) { imat = 8; } else { imat = -8; } for (ita = 1; ita <= 3; ++ita) { *(unsigned char *)transa = *(unsigned char *)&trans[ita - 1]; if (*(unsigned char *)transa != 'C') { i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nk; for (ik = 1; ik <= i__2; ++ik) { k = kval[ik]; if (*(unsigned char *)transa == 'N') { ztimmg_(&c__1, &n, &k, &a[1], &lda, &c__0, &c__0); } else { ztimmg_(&c__1, &k, &n, &a[1], &lda, &c__0, &c__0); } i__3 = *nn; for (in = 1; in <= i__3; ++in) { n = nval[in]; ztimmg_(&imat, &n, &n, &c__[1], &lda, & c__0, &c__0); ic = 0; s1 = dsecnd_(); L380: zsyrk_(uplo, transa, &n, &k, &c_b1, &a[1], &lda, &c_b1, &c__[1], &lda); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&imat, &n, &n, &c__[1], &lda, &c__0, &c__0); goto L380; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L390: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&imat, &n, &n, &c__[1], &lda, &c__0, &c__0); goto L390; } time = (time - untime) / (doublereal) ic; ops = dopbl3_(cname, &n, &n, &k); reslts_ref(ik, in, ilda) = dmflop_(&ops, & time, &c__0); /* L400: */ } /* L410: */ } /* L420: */ } io___45.ciunit = *nout; s_wsfe(&io___45); do_fio(&c__1, cname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); e_wsfe(); dprtbl_("K", "N", nk, &kval[1], nn, &nval[1], nlda, & reslts[reslts_offset], ldr1, ldr2, nout, ( ftnlen)1, (ftnlen)1); } /* L430: */ } /* L440: */ } /* Time ZSYR2K */ } else if (s_cmp(cname, "ZSYR2K", (ftnlen)6, (ftnlen)6) == 0) { for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; if (lsame_(uplo, "U")) { imat = 8; } else { imat = -8; } for (itb = 1; itb <= 3; ++itb) { *(unsigned char *)transb = *(unsigned char *)&trans[itb - 1]; if (*(unsigned char *)transb != 'C') { i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nk; for (ik = 1; ik <= i__2; ++ik) { k = kval[ik]; if (*(unsigned char *)transb == 'N') { ztimmg_(&c__1, &n, &k, &a[1], &lda, &c__0, &c__0); ztimmg_(&c__0, &n, &k, &b[1], &lda, &c__0, &c__0); } else { ztimmg_(&c__1, &k, &n, &a[1], &lda, &c__0, &c__0); ztimmg_(&c__0, &k, &n, &b[1], &lda, &c__0, &c__0); } i__3 = *nn; for (in = 1; in <= i__3; ++in) { n = nval[in]; ztimmg_(&imat, &n, &n, &c__[1], &lda, & c__0, &c__0); ic = 0; s1 = dsecnd_(); L450: zsyr2k_(uplo, transb, &n, &k, &c_b1, &a[1] , &lda, &b[1], &lda, &c_b1, &c__[ 1], &lda); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&imat, &n, &n, &c__[1], &lda, &c__0, &c__0); goto L450; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L460: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&imat, &n, &n, &c__[1], &lda, &c__0, &c__0); goto L460; } time = (time - untime) / (doublereal) ic; ops = dopbl3_(cname, &n, &n, &k); reslts_ref(ik, in, ilda) = dmflop_(&ops, & time, &c__0); /* L470: */ } /* L480: */ } /* L490: */ } io___46.ciunit = *nout; s_wsfe(&io___46); do_fio(&c__1, cname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); e_wsfe(); dprtbl_("K", "N", nk, &kval[1], nn, &nval[1], nlda, & reslts[reslts_offset], ldr1, ldr2, nout, ( ftnlen)1, (ftnlen)1); } /* L500: */ } /* L510: */ } /* Time ZTRMM */ } else if (s_cmp(cname, "ZTRMM ", (ftnlen)6, (ftnlen)6) == 0) { for (iside = 1; iside <= 2; ++iside) { *(unsigned char *)side = *(unsigned char *)&sides[iside - 1]; for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; if (lsame_(uplo, "U")) { imat = 11; } else { imat = -11; } for (ita = 1; ita <= 3; ++ita) { *(unsigned char *)transa = *(unsigned char *)&trans[ ita - 1]; i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nm; for (im = 1; im <= i__2; ++im) { m = mval[im]; i__3 = *nn; for (in = 1; in <= i__3; ++in) { n = nval[in]; if (iside == 1) { ztimmg_(&imat, &m, &m, &a[1], &lda, & c__0, &c__0); } else { ztimmg_(&imat, &n, &n, &a[1], &lda, & c__0, &c__0); } ztimmg_(&c__0, &m, &n, &b[1], &lda, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L520: ztrmm_(side, uplo, transa, "Non-unit", &m, &n, &c_b1, &a[1], &lda, &b[1], & lda); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&c__0, &m, &n, &b[1], &lda, & c__0, &c__0); goto L520; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L530: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&c__0, &m, &n, &b[1], &lda, & c__0, &c__0); goto L530; } time = (time - untime) / (doublereal) ic; i__4 = iside - 1; ops = dopbl3_(cname, &m, &n, &i__4); reslts_ref(im, in, ilda) = dmflop_(&ops, & time, &c__0); /* L540: */ } /* L550: */ } /* L560: */ } io___47.ciunit = *nout; s_wsfe(&io___47); do_fio(&c__1, cname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); e_wsfe(); dprtbl_("M", "N", nm, &mval[1], nn, &nval[1], nlda, & reslts[reslts_offset], ldr1, ldr2, nout, ( ftnlen)1, (ftnlen)1); /* L570: */ } /* L580: */ } /* L590: */ } /* Time ZTRSM */ } else if (s_cmp(cname, "ZTRSM ", (ftnlen)6, (ftnlen)6) == 0) { for (iside = 1; iside <= 2; ++iside) { *(unsigned char *)side = *(unsigned char *)&sides[iside - 1]; for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; if (lsame_(uplo, "U")) { imat = 11; } else { imat = -11; } for (ita = 1; ita <= 3; ++ita) { *(unsigned char *)transa = *(unsigned char *)&trans[ ita - 1]; i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nm; for (im = 1; im <= i__2; ++im) { m = mval[im]; i__3 = *nn; for (in = 1; in <= i__3; ++in) { n = nval[in]; if (iside == 1) { ztimmg_(&imat, &m, &m, &a[1], &lda, & c__0, &c__0); } else { ztimmg_(&imat, &n, &n, &a[1], &lda, & c__0, &c__0); } ztimmg_(&c__0, &m, &n, &b[1], &lda, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L600: ztrsm_(side, uplo, transa, "Non-unit", &m, &n, &c_b1, &a[1], &lda, &b[1], & lda); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&c__0, &m, &n, &b[1], &lda, & c__0, &c__0); goto L600; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L610: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&c__0, &m, &n, &b[1], &lda, & c__0, &c__0); goto L610; } time = (time - untime) / (doublereal) ic; i__4 = iside - 1; ops = dopbl3_(cname, &m, &n, &i__4); reslts_ref(im, in, ilda) = dmflop_(&ops, & time, &c__0); /* L620: */ } /* L630: */ } /* L640: */ } io___48.ciunit = *nout; s_wsfe(&io___48); do_fio(&c__1, cname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); e_wsfe(); dprtbl_("M", "N", nm, &mval[1], nn, &nval[1], nlda, & reslts[reslts_offset], ldr1, ldr2, nout, ( ftnlen)1, (ftnlen)1); /* L650: */ } /* L660: */ } /* L670: */ } } io___49.ciunit = *nout; s_wsfe(&io___49); e_wsfe(); L680: ; } L690: return 0; /* End of ZTIMB3 */ } /* ztimb3_ */
int main(int argc, char* argv[]) { unsigned int m=8, n=8, lda=8, ldb=8, nerrs, num, nmat, nmats, nmatd, ntest; unsigned int layout, asize, VLEND=4, VLENS=8, bsize; unsigned int ncorr; int i, j; char side, uplo, trans, diag; unsigned int typesize8 = 8; unsigned int typesize4 = 4; float *sa, *sb, *sc, *sd; double *da, *db, *dc, *dd, *tmpbuf; double dalpha = 1.0; float salpha; double dtmp; const unsigned char *cptr; unsigned long op_count; const libxsmm_trsm_descriptor* desc8 = NULL; const libxsmm_trsm_descriptor* desc4 = NULL; libxsmm_descriptor_blob blob; union { libxsmm_xtrsmfunction dp; libxsmm_xtrsmfunction sp; const void* pv; } mykernel = { 0 }; #ifdef USE_KERNEL_GENERATION_DIRECTLY void (*opcode_routine)(); #endif #ifdef USE_KERNEL_GENERATION_DIRECTLY #include <unistd.h> #include <signal.h> #include <malloc.h> #include <sys/mman.h> #include "../../src/generator_packed_trsm_avx_avx512.h" unsigned char *routine_output; libxsmm_generated_code io_generated_code; int pagesize = sysconf(_SC_PAGE_SIZE); if (pagesize == -1) fprintf(stderr,"sysconf pagesize\n"); routine_output = (unsigned char *) mmap(NULL, BUFSIZE2, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0,0); if (mprotect(routine_output, BUFSIZE2, PROT_EXEC | PROT_READ | PROT_WRITE ) == -1) fprintf(stderr,"mprotect\n"); printf("Routine ready\n"); io_generated_code.generated_code = &routine_output[0]; io_generated_code.buffer_size = BUFSIZE2; io_generated_code.code_size = 0; io_generated_code.code_type = 2; io_generated_code.last_error = 0; #endif if ( argc <= 3 ) { printf("\nUSAGE: %s m n lda ldb nmat side uplo trans diag layout ntest alpha\n",argv[0]); printf("Compact TRSM a mxn matrix of leading dimension ldb\n"); printf("This will test the jit of 1 VLEN work of nmat at a time\n"); printf("Defaults: m=n=lda=ldb=nmat=8, alpha=1.0, side=uplo='L',trans=diag='N',layout=102,ntest=1\n"); } if ( argc > 1 ) m = atoi(argv[1]); else m = 8; if ( argc > 2 ) n = atoi(argv[2]); else n = 8; if ( argc > 3 ) lda= atoi(argv[3]); else lda = 8; if ( argc > 4 ) ldb = atoi(argv[4]); else ldb = 8; if ( argc > 5 ) nmat = atoi(argv[5]); else nmat = 8; if ( argc > 6 ) side = argv[6][0]; else side = 'L'; if ( argc > 7 ) uplo = argv[7][0]; else uplo = 'L'; if ( argc > 8 ) trans = argv[8][0]; else trans = 'N'; if ( argc > 9 ) diag = argv[9][0]; else diag = 'N'; if ( argc > 10 ) layout = atoi(argv[10]); else layout=102; if ( argc > 11 ) ntest = atoi(argv[11]); else ntest = 1; if ( argc > 12 ) dalpha = atof(argv[12]); else dalpha = 1.0; salpha = (float)dalpha; m = LIBXSMM_MAX(m,1); n = LIBXSMM_MAX(n,1); /* A is either mxm or nxn depending on side */ if ( (side == 'L') || (side=='l') ) asize = m; else asize = n; lda = LIBXSMM_MAX(lda,asize); if ( layout == 102 ) { /* Column major: B is mxn, and stored in B format */ ldb = LIBXSMM_MAX(ldb,m); bsize = ldb*n; } else { /* Row major: B is mxn, and stored in B^T format */ ldb = LIBXSMM_MAX(ldb,n); bsize = ldb*m; } nmats = LIBXSMM_MAX(VLENS,nmat - (nmat%VLENS)); nmatd = LIBXSMM_MAX(VLEND,nmat - (nmat%VLEND)); nmat = LIBXSMM_MAX(nmats,nmatd); op_count = n * m * asize; printf("This is a real*%u tester for JIT compact TRSM kernels! (%c%c%c%c m=%u n=%u lda=%u ldb=%u layout=%u nmat=%u)\n",typesize8,side,uplo,trans,diag,m,n,lda,ldb,layout,nmat); #ifdef USE_XSMM_GENERATED printf("This code tests the LIBXSMM generated kernels\n"); #endif #ifdef USE_PREDEFINED_ASSEMBLY printf("This code tests some predefined assembly kenrel\n"); #endif #ifdef USE_KERNEL_GENERATION_DIRECTLY printf("This code tests kernel generation directly\n"); #endif #ifdef TIME_MKL printf("This code tests MKL compact batch directly\n"); #endif desc8 = libxsmm_trsm_descriptor_init(&blob, typesize8, m, n, lda, ldb, &dalpha, trans, diag, side, uplo, layout); desc4 = libxsmm_trsm_descriptor_init(&blob, typesize4, m, n, lda, ldb, &salpha, trans, diag, side, uplo, layout); #ifdef USE_XSMM_GENERATED printf("calling libxsmm_dispatch_trsm: typesize8=%u\n",typesize8); mykernel.dp = libxsmm_dispatch_trsm(desc8); printf("done calling libxsmm_dispatch_trsm: typesize8=%u\n",typesize8); if ( mykernel.dp == NULL ) printf("R8 Kernel after the create call is null\n"); mykernel.sp = libxsmm_dispatch_trsm(desc4); if ( mykernel.sp == NULL ) printf("R4 kernel after the create call is null\n"); #endif #ifdef USE_KERNEL_GENERATION_DIRECTLY libxsmm_generator_trsm_kernel ( &io_generated_code, &desc8, "hsw" ); #endif #ifndef NO_ACCURACY_CHECK printf("mallocing matrices\n"); #endif sa = (float *) malloc ( lda*asize*nmats*sizeof(float) ); da = (double *) malloc ( lda*asize*nmatd*sizeof(double) ); sb = (float *) malloc ( bsize*nmats*sizeof(float) ); db = (double *) malloc ( bsize*nmatd*sizeof(double) ); sc = (float *) malloc ( bsize*nmats*sizeof(float) ); dc = (double *) malloc ( bsize*nmatd*sizeof(double) ); sd = (float *) malloc ( bsize*nmats*sizeof(float) ); dd = (double *) malloc ( bsize*nmatd*sizeof(double) ); tmpbuf = (double *) malloc ( asize*VLEND*sizeof(double) ); #ifndef NO_ACCURACY_CHECK printf("filling matrices\n"); #endif sfill_matrix ( sa, lda, asize, asize*nmats ); #ifdef TRIANGLE_IS_IDENTITY printf("Warning: setting triangular matrix to identity. Not good for accuracy testing\n"); dfill_identity ( da, lda, asize, asize, VLEND, nmatd/VLEND ); #else dfill_matrix ( da, lda, asize, asize*nmatd ); #endif sfill_matrix ( sb, bsize, bsize, nmats ); dfill_matrix ( db, bsize, bsize, nmatd ); #ifndef NO_ACCURACY_CHECK for ( i = 0 ; i < (int)(bsize*nmats) ; i++ ) sc[i]=sb[i]; for ( i = 0 ; i < (int)(bsize*nmatd) ; i++ ) dc[i]=db[i]; for ( i = 0 ; i < (int)(bsize*nmats) ; i++ ) sd[i]=sb[i]; for ( i = 0 ; i < (int)(bsize*nmatd) ; i++ ) dd[i]=db[i]; printf("Pointing at the kernel now\n"); #endif #ifdef USE_XSMM_GENERATED cptr = (const unsigned char*) mykernel.pv; #endif #ifdef USE_PREDEFINED_ASSEMBLY cptr = (const unsigned char*) trsm_xct_; #endif #ifdef USE_KERNEL_GENERATION_DIRECTLY cptr = (const unsigned char*) &routine_output[0]; opcode_routine = (void *) &cptr[0]; #endif #ifndef TIME_MKL #define DUMP_ASSEMBLY_FILE #endif #ifdef DUMP_ASSEMBLY_FILE printf("Dumping assembly file\n"); FILE *fp = fopen("foo.s","w"); char buffer[80]; fputs("\t.text\n",fp); fputs("\t.align 256\n",fp); fputs("\t.globl trsm_xct_\n",fp); fputs("trsm_xct_:\n",fp); for (i = 0 ; i < 4000; i+=4 ) { sprintf(buffer,".byte 0x%02x, 0x%02x, 0x%02x, 0x%02x\n",cptr[i],cptr[i+1],cptr[i+2],cptr[i+3]); fputs(buffer,fp); } fputs("\tretq\n",fp); fputs("\t.type trsm_xct_,@function\n",fp); fputs("\t.size trsm_xct_,.-trsm_xct_\n",fp); fclose(fp); #endif #if defined(USE_MKL_FOR_REFERENCE) || defined(TIME_MKL) #include "mkl.h" MKL_LAYOUT CLAYOUT = (layout == 101) ? MKL_ROW_MAJOR : MKL_COL_MAJOR; MKL_SIDE SIDE = (side == 'R' || side == 'r') ? MKL_RIGHT : MKL_LEFT; MKL_UPLO UPLO = (uplo == 'U' || uplo == 'u') ? MKL_UPPER : MKL_LOWER; MKL_TRANSPOSE TRANSA = (trans == 'N' || trans == 'n') ? MKL_NOTRANS : MKL_TRANS; MKL_DIAG DIAG = (diag == 'N' || diag == 'n') ? MKL_NONUNIT : MKL_UNIT; MKL_COMPACT_PACK CMP_FORMAT = mkl_get_format_compact(); #if 0 MKL_COMPACT_PACK CMP_FORMAT = MKL_COMPACT_AVX; #endif #endif #ifndef NO_ACCURACY_CHECK printf("Before routine, initial B(1,1)=%g B[256]=%g\n",db[0],db[256]); #endif #ifdef USE_PREDEFINED_ASSEMBLY double one = 1.0; #endif double timer; #ifdef MKL_TIMER double tmptimer; tmptimer = dsecnd_(); #else unsigned long long l_start, l_end; #endif timer = 0.0; for ( j = 0 ; j < (int)ntest ; j++ ) { #ifndef TRIANGLE_IS_IDENTITY for ( i = 0 ; i < (int)(bsize*nmatd) ; i++ ) db[i]=dd[i]; #endif for ( i = 0 , num = 0; i < (int)nmatd ; i+= (int)VLEND, num++ ) { double *Ap = &da[num*lda*asize*VLEND]; double *Bp = &db[num*bsize*VLEND]; #ifdef MKL_TIMER tmptimer = dsecnd_(); #else l_start = libxsmm_timer_tick(); #endif #ifdef USE_XSMM_GENERATED mykernel.dp ( Ap, Bp, tmpbuf ); #endif #ifdef USE_PREDEFINED_ASSEMBLY trsm_xct_ ( Ap, Bp, &one ); #endif #ifdef USE_KERNEL_GENERATION_DIRECTLY (*opcode_routine)( Ap, Bp ); #endif #ifdef TIME_MKL mkl_dtrsm_compact ( CLAYOUT, SIDE, UPLO, TRANSA, DIAG, m, n, dalpha, da, lda, db, ldb, CMP_FORMAT, nmatd ); i+=nmatd; /* Because MKL will do everything */ #endif #ifdef MKL_TIMER timer += dsecnd_() - tmptimer; #else l_end = libxsmm_timer_tick(); timer += libxsmm_timer_duration(l_start,l_end); #endif } } timer /= ((double)ntest); #ifndef NO_ACCURACY_CHECK printf("Average time to get through %u matrices: %g\n",nmatd,timer); printf("Gflops: %g\n",(double)(op_count*nmatd)/(timer*1.0e9)); printf("after routine, new B(1,1)=%g B[256]=%g\n",db[0],db[256]); #endif #ifdef TEST_SINGLE printf("Before r4 routine, initial B(1,1)=%g B[256]=%g\n",sb[0],sb[256]); for ( i = 0 , num = 0; i < nmats ; i+= VLENS, num++ ) { float *Ap = &sa[num*lda*asize*VLENS]; float *Bp = &sb[num*bsize*VLENS]; #ifdef USE_XSMM_GENERATED mykernel.sp ( Ap, Bp, NULL ); #endif } printf("after r4 routine, new B(1,1)=%g B]256]=%g\n",db[0],db[256]); #endif #ifndef NO_ACCURACY_CHECK /* Call some reference code now on a copy of the B matrix (C) */ double timer2 = 0.0; for ( j = 0 ; j < (int)ntest ; j++ ) { #ifndef TRIANGLE_IS_IDENTITY for ( i = 0 ; i < (int)(bsize*nmatd) ; i++ ) dc[i]=dd[i]; #endif #ifdef MKL_TIMER tmptimer = dsecnd_(); #else l_start = libxsmm_timer_tick(); #endif #ifdef USE_MKL_FOR_REFERENCE mkl_dtrsm_compact ( CLAYOUT, SIDE, UPLO, TRANSA, DIAG, m, n, dalpha, da, lda, dc, ldb, CMP_FORMAT, nmatd ); #elif !defined(LIBXSMM_NOFORTRAN) if ( (layout == 101) && (nmatd!=VLEND) ) { unsigned int lay = 102, m1 = n, n1 = m; char side1='L', uplo1='L'; if ( side == 'L' || side == 'l' ) side1 = 'R'; if ( uplo == 'L' || uplo == 'l' ) uplo1 = 'U'; compact_dtrsm_ ( &lay, &side1, &uplo1, &trans, &diag, &m1, &n1, &dalpha, da, &lda, dc, &ldb, &nmatd, &VLEND ); } else { compact_dtrsm_ ( &layout, &side, &uplo, &trans, &diag, &m, &n, &dalpha, da, &lda, dc, &ldb, &nmatd, &VLEND ); } #endif #ifdef MKL_TIMER timer2 += dsecnd_() - tmptimer; #else l_end = libxsmm_timer_tick(); timer2 += libxsmm_timer_duration(l_start,l_end); #endif } timer2 /= ((double)ntest); printf("Reference time=%g Reference Gflops=%g\n",timer2,(op_count*nmatd)/(timer2*1.0e9)); /* Compute the residual between B and C */ dtmp = residual_d ( dc, bsize, bsize, nmatd, db, bsize, &nerrs, &ncorr ); printf("R8 %c%c%c%c m=%u n=%u lda=%u ldb=%u error: %g number of errors: %u corrects: %u",side,uplo,trans,diag,m,n,lda,ldb,dtmp,nerrs,ncorr); if ( nerrs > 0 ) printf(" ->FAILED at %ux%u real*8 %u case",m,n,layout); printf("\n"); #ifdef TEST_SINGLE /* Call some reference code now on a copy of the B matrix (C) */ compact_strsm_ ( &layout, &side, &uplo, &trans, &diag, &m, &n, &salpha, sa, &lda, sc, &ldb, &nmats, &VLENS ); /* Compute the residual between B and C */ dtmp = residual_s ( sc, bsize, bsize, nmats, sb, bsize, &nerrs, &ncorr ); printf("R4 %c%c%c%c m=%u n=%u lda=%u ldb=%u error: %g number of errors: %u corrects: %u\n",side,uplo,trans,diag,m,n,lda,ldb,dtmp,nerrs,ncorr); if ( nerrs > 0 ) printf(" ->FAILED at %ux%u real*4 case",m,n); printf("\n"); #endif #else for ( j = 0, nerrs = 0 ; j < bsize*nmatd ; j++ ) { if ( isnan(db[j]) || isinf(db[j]) ) { if ( ++nerrs < 10 ) { printf("WARNING: db[%d]=%g\n",j,db[j]); } } } printf("%g,real*8 %c%c%c%c m=%u n=%u lda=%u ldb=%u Denormals=%u Time=%g Gflops=%g",(op_count*nmatd)/(timer*1.0e9),side,uplo,trans,diag,m,n,lda,ldb,nerrs,timer,(op_count*nmatd)/(timer*1.0e9)); if ( nerrs > 0 ) printf(" -> FAILED at %ux%u real*8 case",m,n); printf("\n"); #endif free(dd); free(sd); free(dc); free(sc); free(db); free(sb); free(da); free(sa); return 0; }
/* Subroutine */ int ztimpo_(char *line, integer *nn, integer *nval, integer * nns, integer *nsval, integer *nnb, integer *nbval, integer *nlda, integer *ldaval, doublereal *timmin, doublecomplex *a, doublecomplex * b, integer *iwork, doublereal *reslts, integer *ldr1, integer *ldr2, integer *ldr3, integer *nout, ftnlen line_len) { /* Initialized data */ static char uplos[1*2] = "U" "L"; static char subnam[6*3] = "ZPOTRF" "ZPOTRS" "ZPOTRI"; /* Format strings */ static char fmt_9999[] = "(1x,a6,\002 timing run not attempted\002,/)"; static char fmt_9998[] = "(/\002 *** Speed of \002,a6,\002 in megaflops " "***\002)"; static char fmt_9997[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)"; static char fmt_9996[] = "(5x,a6,\002 with UPLO = '\002,a1,\002'\002,/)"; /* System generated locals */ integer reslts_dim1, reslts_dim2, reslts_dim3, reslts_offset, i__1, i__2, i__3; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void); /* Local variables */ static integer ilda, info; static char path[3]; static doublereal time; static integer isub, nrhs; static char uplo[1]; static integer i__, n; static char cname[6]; extern doublereal dopla_(char *, integer *, integer *, integer *, integer *, integer *); extern logical lsame_(char *, char *); static integer iuplo, i3; static doublereal s1, s2; static integer ic, nb, in; extern doublereal dsecnd_(void); extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, ftnlen); extern doublereal dmflop_(doublereal *, doublereal *, integer *); extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), dprtbl_( char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen), xlaenv_(integer *, integer *); static doublereal untime; extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static logical timsub[3]; extern /* Subroutine */ int ztimmg_(integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *), zpotrf_(char *, integer *, doublecomplex *, integer *, integer *), zpotri_(char *, integer *, doublecomplex *, integer *, integer *), zpotrs_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *); static integer lda, ldb, icl, inb, mat; static doublereal ops; /* Fortran I/O blocks */ static cilist io___7 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___29 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___30 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___31 = { 0, 0, 0, 0, 0 }; static cilist io___32 = { 0, 0, 0, fmt_9996, 0 }; #define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6] #define reslts_ref(a_1,a_2,a_3,a_4) reslts[(((a_4)*reslts_dim3 + (a_3))*\ reslts_dim2 + (a_2))*reslts_dim1 + a_1] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= ZTIMPO times ZPOTRF, -TRS, and -TRI. Arguments ========= LINE (input) CHARACTER*80 The input line that requested this routine. The first six characters contain either the name of a subroutine or a generic path name. The remaining characters may be used to specify the individual routines to be timed. See ATIMIN for a full description of the format of the input line. NN (input) INTEGER The number of values of N contained in the vector NVAL. NVAL (input) INTEGER array, dimension (NN) The values of the matrix size N. NNS (input) INTEGER The number of values of NRHS contained in the vector NSVAL. NSVAL (input) INTEGER array, dimension (NNS) The values of the number of right hand sides NRHS. NNB (input) INTEGER The number of values of NB contained in the vector NBVAL. NBVAL (input) INTEGER array, dimension (NNB) The values of the blocksize NB. NLDA (input) INTEGER The number of values of LDA contained in the vector LDAVAL. LDAVAL (input) INTEGER array, dimension (NLDA) The values of the leading dimension of the array A. TIMMIN (input) DOUBLE PRECISION The minimum time a subroutine will be timed. A (workspace) COMPLEX*16 array, dimension (LDAMAX*NMAX) where LDAMAX and NMAX are the maximum values permitted for LDA and N. B (workspace) COMPLEX*16 array, dimension (LDAMAX*NMAX) IWORK (workspace) INTEGER array, dimension (NMAX) RESLTS (output) DOUBLE PRECISION array, dimension (LDR1,LDR2,LDR3,NSUBS) The timing results for each subroutine over the relevant values of N, NB, and LDA. LDR1 (input) INTEGER The first dimension of RESLTS. LDR1 >= max(4,NNB). LDR2 (input) INTEGER The second dimension of RESLTS. LDR2 >= max(1,NN). LDR3 (input) INTEGER The third dimension of RESLTS. LDR3 >= max(1,2*NLDA). NOUT (input) INTEGER The unit number for output. ===================================================================== Parameter adjustments */ --nval; --nsval; --nbval; --ldaval; --a; --b; --iwork; reslts_dim1 = *ldr1; reslts_dim2 = *ldr2; reslts_dim3 = *ldr3; reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * (1 + reslts_dim3 * 1) ); reslts -= reslts_offset; /* Function Body Extract the timing request from the input line. */ s_copy(path, "Zomplex precision", (ftnlen)1, (ftnlen)17); s_copy(path + 1, "PO", (ftnlen)2, (ftnlen)2); atimin_(path, line, &c__3, subnam, timsub, nout, &info, (ftnlen)3, ( ftnlen)80, (ftnlen)6); if (info != 0) { goto L150; } /* Check that N <= LDA for the input values. */ s_copy(cname, line, (ftnlen)6, (ftnlen)6); atimck_(&c__2, cname, nn, &nval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); if (info > 0) { io___7.ciunit = *nout; s_wsfe(&io___7); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); goto L150; } /* Do first for UPLO = 'U', then for UPLO = 'L' */ for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; if (lsame_(uplo, "U")) { mat = 3; } else { mat = -3; } /* Do for each value of N in NVAL. */ i__1 = *nn; for (in = 1; in <= i__1; ++in) { n = nval[in]; /* Do for each value of LDA: */ i__2 = *nlda; for (ilda = 1; ilda <= i__2; ++ilda) { lda = ldaval[ilda]; i3 = (iuplo - 1) * *nlda + ilda; /* Do for each value of NB in NBVAL. Only the blocked routines are timed in this loop since the other routines are independent of NB. */ i__3 = *nnb; for (inb = 1; inb <= i__3; ++inb) { nb = nbval[inb]; xlaenv_(&c__1, &nb); /* Time ZPOTRF */ if (timsub[0]) { ztimmg_(&mat, &n, &n, &a[1], &lda, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L10: zpotrf_(uplo, &n, &a[1], &lda, &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&mat, &n, &n, &a[1], &lda, &c__0, &c__0); goto L10; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L20: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&mat, &n, &n, &a[1], &lda, &c__0, &c__0); goto L20; } time = (time - untime) / (doublereal) ic; ops = dopla_("ZPOTRF", &n, &n, &c__0, &c__0, &nb); reslts_ref(inb, in, i3, 1) = dmflop_(&ops, &time, & info); } else { ic = 0; ztimmg_(&mat, &n, &n, &a[1], &lda, &c__0, &c__0); } /* Generate another matrix and factor it using ZPOTRF so that the factored form can be used in timing the other routines. */ if (ic != 1) { zpotrf_(uplo, &n, &a[1], &lda, &info); } /* Time ZPOTRI */ if (timsub[2]) { zlacpy_(uplo, &n, &n, &a[1], &lda, &b[1], &lda); ic = 0; s1 = dsecnd_(); L30: zpotri_(uplo, &n, &b[1], &lda, &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { zlacpy_(uplo, &n, &n, &a[1], &lda, &b[1], &lda); goto L30; } /* Subtract the time used in ZLACPY. */ icl = 1; s1 = dsecnd_(); L40: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { zlacpy_(uplo, &n, &n, &a[1], &lda, &b[1], &lda); goto L40; } time = (time - untime) / (doublereal) ic; ops = dopla_("ZPOTRI", &n, &n, &c__0, &c__0, &nb); reslts_ref(inb, in, i3, 3) = dmflop_(&ops, &time, & info); } /* L50: */ } /* Time ZPOTRS */ if (timsub[1]) { i__3 = *nns; for (i__ = 1; i__ <= i__3; ++i__) { nrhs = nsval[i__]; ldb = lda; ztimmg_(&c__0, &n, &nrhs, &b[1], &ldb, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L60: zpotrs_(uplo, &n, &nrhs, &a[1], &lda, &b[1], &ldb, & info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&c__0, &n, &nrhs, &b[1], &ldb, &c__0, & c__0); goto L60; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L70: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&c__0, &n, &nrhs, &b[1], &ldb, &c__0, & c__0); goto L70; } time = (time - untime) / (doublereal) ic; ops = dopla_("ZPOTRS", &n, &nrhs, &c__0, &c__0, &c__0); reslts_ref(i__, in, i3, 2) = dmflop_(&ops, &time, & info); /* L80: */ } } /* L90: */ } /* L100: */ } /* L110: */ } /* Print tables of results for each timed routine. */ for (isub = 1; isub <= 3; ++isub) { if (! timsub[isub - 1]) { goto L140; } io___29.ciunit = *nout; s_wsfe(&io___29); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); e_wsfe(); if (*nlda > 1) { i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___30.ciunit = *nout; s_wsfe(&io___30); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer)); e_wsfe(); /* L120: */ } } io___31.ciunit = *nout; s_wsle(&io___31); e_wsle(); for (iuplo = 1; iuplo <= 2; ++iuplo) { io___32.ciunit = *nout; s_wsfe(&io___32); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); do_fio(&c__1, uplos + (iuplo - 1), (ftnlen)1); e_wsfe(); i3 = (iuplo - 1) * *nlda + 1; if (isub == 1) { dprtbl_("NB", "N", nnb, &nbval[1], nn, &nval[1], nlda, & reslts_ref(1, 1, i3, 1), ldr1, ldr2, nout, (ftnlen)2, (ftnlen)1); } else if (isub == 2) { dprtbl_("NRHS", "N", nns, &nsval[1], nn, &nval[1], nlda, & reslts_ref(1, 1, i3, 2), ldr1, ldr2, nout, (ftnlen)4, (ftnlen)1); } else if (isub == 3) { dprtbl_("NB", "N", nnb, &nbval[1], nn, &nval[1], nlda, & reslts_ref(1, 1, i3, 3), ldr1, ldr2, nout, (ftnlen)2, (ftnlen)1); } /* L130: */ } L140: ; } L150: return 0; /* End of ZTIMPO */ } /* ztimpo_ */
/* Main program */ int MAIN__(void) { /* Format strings */ static char fmt_9994[] = "(/\002 Tests of the DOUBLE PRECISION LAPACK RF" "P routines \002,/\002 LAPACK VERSION \002,i1,\002.\002,i1,\002" ".\002,i1,//\002 The following parameter values will be used:\002)" ; static char fmt_9996[] = "(\002 !! Invalid input value: \002,a4,\002=" "\002,i6,\002; must be >=\002,i6)"; static char fmt_9995[] = "(\002 !! Invalid input value: \002,a4,\002=" "\002,i6,\002; must be <=\002,i6)"; static char fmt_9993[] = "(4x,a4,\002: \002,10i6,/11x,10i6)"; static char fmt_9992[] = "(/\002 Routines pass computational tests if te" "st ratio is \002,\002less than\002,f8.2,/)"; static char fmt_9999[] = "(/\002 Execution not attempted due to input er" "rors\002)"; static char fmt_9991[] = "(\002 Relative machine \002,a,\002 is taken to" " be\002,d16.6)"; static char fmt_9998[] = "(/\002 End of tests\002)"; static char fmt_9997[] = "(\002 Total time used = \002,f12.2,\002 seco" "nds\002,/)"; /* System generated locals */ integer i__1; doublereal d__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), e_rsle(void), s_wsfe(cilist *), do_fio(integer * , char *, ftnlen), e_wsfe(void), do_lio(integer *, integer *, char *, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer s_wsle(cilist *), e_wsle(void), f_clos(cllist *); /* Local variables */ doublereal workafac[2500] /* was [50][50] */, workasav[2500] /* was [50][50] */, workbsav[800] /* was [50][16] */, workainv[ 2500] /* was [50][50] */, workxact[800] /* was [50][ 16] */; integer i__; doublereal s1, s2; integer nn, vers_patch__, vers_major__, vers_minor__; doublereal workarfinv[1275], eps; integer nns, nnt, nval[12]; doublereal d_temp_dpot02__[800] /* was [50][16] */, d_temp_dpot03__[ 2500] /* was [50][50] */, d_work_dpot01__[50], d_work_dpot02__[50], d_work_dpot03__[50]; logical fatal; integer nsval[12], ntval[9]; doublereal worka[2500] /* was [50][50] */, workb[800] /* was [50][ 16] */, workx[800] /* was [50][16] */, d_work_dlatms__[150], d_work_dlansy__[50]; extern doublereal dlamch_(char *), dsecnd_(void); extern /* Subroutine */ int ilaver_(integer *, integer *, integer *); doublereal thresh, workap[1275]; logical tsterr; extern /* Subroutine */ int ddrvrf1_(integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *) , ddrvrf2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *), ddrvrf3_(integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), ddrvrf4_(integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), derrrfp_( integer *), ddrvrfp_(integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal workarf[1275]; /* Fortran I/O blocks */ static cilist io___3 = { 0, 5, 0, 0, 0 }; static cilist io___7 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___10 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___11 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___12 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___16 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___17 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___18 = { 0, 5, 0, 0, 0 }; static cilist io___20 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___21 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___24 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___25 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___26 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___27 = { 0, 5, 0, 0, 0 }; static cilist io___29 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___30 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___31 = { 0, 5, 0, 0, 0 }; static cilist io___33 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___34 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___35 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___36 = { 0, 5, 0, 0, 0 }; static cilist io___38 = { 0, 6, 0, fmt_9992, 0 }; static cilist io___39 = { 0, 5, 0, 0, 0 }; static cilist io___41 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___42 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___45 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___46 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___47 = { 0, 6, 0, 0, 0 }; static cilist io___67 = { 0, 6, 0, fmt_9998, 0 }; static cilist io___68 = { 0, 6, 0, fmt_9997, 0 }; /* -- LAPACK test routine (version 3.2.0) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2008 */ /* Purpose */ /* ======= */ /* DCHKRFP is the main test program for the DOUBLE PRECISION linear */ /* equation routines with RFP storage format */ /* Internal Parameters */ /* =================== */ /* MAXIN INTEGER */ /* The number of different values that can be used for each of */ /* M, N, or NB */ /* MAXRHS INTEGER */ /* The maximum number of right hand sides */ /* NTYPES INTEGER */ /* NMAX INTEGER */ /* The maximum allowable value for N. */ /* NIN INTEGER */ /* The unit number for input */ /* NOUT INTEGER */ /* The unit number for output */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ s1 = dsecnd_(); fatal = FALSE_; /* Read a dummy line. */ s_rsle(&io___3); e_rsle(); /* Report LAPACK version tag (e.g. LAPACK-3.2.0) */ ilaver_(&vers_major__, &vers_minor__, &vers_patch__); s_wsfe(&io___7); do_fio(&c__1, (char *)&vers_major__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&vers_minor__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&vers_patch__, (ftnlen)sizeof(integer)); e_wsfe(); /* Read the values of N */ s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&nn, (ftnlen)sizeof(integer)); e_rsle(); if (nn < 1) { s_wsfe(&io___10); do_fio(&c__1, " NN ", (ftnlen)4); do_fio(&c__1, (char *)&nn, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nn = 0; fatal = TRUE_; } else if (nn > 12) { s_wsfe(&io___11); do_fio(&c__1, " NN ", (ftnlen)4); do_fio(&c__1, (char *)&nn, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__12, (ftnlen)sizeof(integer)); e_wsfe(); nn = 0; fatal = TRUE_; } s_rsle(&io___12); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { if (nval[i__ - 1] < 0) { s_wsfe(&io___15); do_fio(&c__1, " M ", (ftnlen)4); do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (nval[i__ - 1] > 50) { s_wsfe(&io___16); do_fio(&c__1, " M ", (ftnlen)4); do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__50, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L10: */ } if (nn > 0) { s_wsfe(&io___17); do_fio(&c__1, "N ", (ftnlen)4); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Read the values of NRHS */ s_rsle(&io___18); do_lio(&c__3, &c__1, (char *)&nns, (ftnlen)sizeof(integer)); e_rsle(); if (nns < 1) { s_wsfe(&io___20); do_fio(&c__1, " NNS", (ftnlen)4); do_fio(&c__1, (char *)&nns, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nns = 0; fatal = TRUE_; } else if (nns > 12) { s_wsfe(&io___21); do_fio(&c__1, " NNS", (ftnlen)4); do_fio(&c__1, (char *)&nns, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__12, (ftnlen)sizeof(integer)); e_wsfe(); nns = 0; fatal = TRUE_; } s_rsle(&io___22); i__1 = nns; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)) ; } e_rsle(); i__1 = nns; for (i__ = 1; i__ <= i__1; ++i__) { if (nsval[i__ - 1] < 0) { s_wsfe(&io___24); do_fio(&c__1, "NRHS", (ftnlen)4); do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (nsval[i__ - 1] > 16) { s_wsfe(&io___25); do_fio(&c__1, "NRHS", (ftnlen)4); do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__16, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L30: */ } if (nns > 0) { s_wsfe(&io___26); do_fio(&c__1, "NRHS", (ftnlen)4); i__1 = nns; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Read the matrix types */ s_rsle(&io___27); do_lio(&c__3, &c__1, (char *)&nnt, (ftnlen)sizeof(integer)); e_rsle(); if (nnt < 1) { s_wsfe(&io___29); do_fio(&c__1, " NMA", (ftnlen)4); do_fio(&c__1, (char *)&nnt, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nnt = 0; fatal = TRUE_; } else if (nnt > 9) { s_wsfe(&io___30); do_fio(&c__1, " NMA", (ftnlen)4); do_fio(&c__1, (char *)&nnt, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); nnt = 0; fatal = TRUE_; } s_rsle(&io___31); i__1 = nnt; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&ntval[i__ - 1], (ftnlen)sizeof(integer)) ; } e_rsle(); i__1 = nnt; for (i__ = 1; i__ <= i__1; ++i__) { if (ntval[i__ - 1] < 0) { s_wsfe(&io___33); do_fio(&c__1, "TYPE", (ftnlen)4); do_fio(&c__1, (char *)&ntval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (ntval[i__ - 1] > 9) { s_wsfe(&io___34); do_fio(&c__1, "TYPE", (ftnlen)4); do_fio(&c__1, (char *)&ntval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L320: */ } if (nnt > 0) { s_wsfe(&io___35); do_fio(&c__1, "TYPE", (ftnlen)4); i__1 = nnt; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&ntval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Read the threshold value for the test ratios. */ s_rsle(&io___36); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); s_wsfe(&io___38); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Read the flag that indicates whether to test the error exits. */ s_rsle(&io___39); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); if (fatal) { s_wsfe(&io___41); e_wsfe(); s_stop("", (ftnlen)0); } if (fatal) { s_wsfe(&io___42); e_wsfe(); s_stop("", (ftnlen)0); } /* Calculate and print the machine dependent constants. */ eps = dlamch_("Underflow threshold"); s_wsfe(&io___44); do_fio(&c__1, "underflow", (ftnlen)9); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); eps = dlamch_("Overflow threshold"); s_wsfe(&io___45); do_fio(&c__1, "overflow ", (ftnlen)9); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); eps = dlamch_("Epsilon"); s_wsfe(&io___46); do_fio(&c__1, "precision", (ftnlen)9); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); s_wsle(&io___47); e_wsle(); /* Test the error exit of: */ if (tsterr) { derrrfp_(&c__6); } /* Test the routines: dpftrf, dpftri, dpftrs (as in DDRVPO). */ /* This also tests the routines: dtfsm, dtftri, dtfttr, dtrttf. */ ddrvrfp_(&c__6, &nn, nval, &nns, nsval, &nnt, ntval, &thresh, worka, workasav, workafac, workainv, workb, workbsav, workxact, workx, workarf, workarfinv, d_work_dlatms__, d_work_dpot01__, d_temp_dpot02__, d_temp_dpot03__, d_work_dlansy__, d_work_dpot02__, d_work_dpot03__); /* Test the routine: dlansf */ ddrvrf1_(&c__6, &nn, nval, &thresh, worka, &c__50, workarf, d_work_dlansy__); /* Test the convertion routines: */ /* dtfttp, dtpttf, dtfttr, dtrttf, dtrttp and dtpttr. */ ddrvrf2_(&c__6, &nn, nval, worka, &c__50, workarf, workap, workasav); /* Test the routine: dtfsm */ ddrvrf3_(&c__6, &nn, nval, &thresh, worka, &c__50, workarf, workainv, workafac, d_work_dlansy__, d_work_dpot03__, d_work_dpot01__); /* Test the routine: dsfrk */ ddrvrf4_(&c__6, &nn, nval, &thresh, worka, workafac, &c__50, workarf, workainv, &c__50, d_work_dlansy__); cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); s2 = dsecnd_(); s_wsfe(&io___67); e_wsfe(); s_wsfe(&io___68); d__1 = s2 - s1; do_fio(&c__1, (char *)&d__1, (ftnlen)sizeof(doublereal)); e_wsfe(); /* End of DCHKRFP */ return 0; } /* MAIN__ */
/* Subroutine */ int ztimtp_(char *line, integer *nn, integer *nval, integer * nns, integer *nsval, integer *la, doublereal *timmin, doublecomplex * a, doublecomplex *b, doublereal *reslts, integer *ldr1, integer *ldr2, integer *ldr3, integer *nout, ftnlen line_len) { /* Initialized data */ static char subnam[6*2] = "ZTPTRI" "ZTPTRS"; static char uplos[1*2] = "U" "L"; /* Format strings */ static char fmt_9999[] = "(1x,a6,\002 timing run not attempted\002,/)"; static char fmt_9998[] = "(/\002 *** Speed of \002,a6,\002 in megaflops " "***\002,/)"; static char fmt_9997[] = "(5x,a6,\002 with UPLO = '\002,a1,\002'\002,/)"; /* System generated locals */ integer reslts_dim1, reslts_dim2, reslts_dim3, reslts_offset, i__1, i__2; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ static integer info; static char path[3]; static doublereal time; static integer isub, nrhs; static char uplo[1]; static integer i__, n; static char cname[6]; static integer laval[1]; extern doublereal dopla_(char *, integer *, integer *, integer *, integer *, integer *); extern logical lsame_(char *, char *); static integer iuplo; static doublereal s1, s2; static integer ic, in; extern doublereal dsecnd_(void); extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, ftnlen); extern doublereal dmflop_(doublereal *, doublereal *, integer *); extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), dprtbl_( char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen); static doublereal untime; static logical timsub[2]; static integer idummy[1]; extern /* Subroutine */ int ztimmg_(integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *), ztptri_(char *, char *, integer *, doublecomplex *, integer *), ztptrs_(char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, integer *); static integer lda, ldb, icl, mat; static doublereal ops; /* Fortran I/O blocks */ static cilist io___8 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___26 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___27 = { 0, 0, 0, fmt_9997, 0 }; #define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6] #define reslts_ref(a_1,a_2,a_3,a_4) reslts[(((a_4)*reslts_dim3 + (a_3))*\ reslts_dim2 + (a_2))*reslts_dim1 + a_1] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= ZTIMTP times ZTPTRI and -TRS. Arguments ========= LINE (input) CHARACTER*80 The input line that requested this routine. The first six characters contain either the name of a subroutine or a generic path name. The remaining characters may be used to specify the individual routines to be timed. See ATIMIN for a full description of the format of the input line. NN (input) INTEGER The number of values of N contained in the vector NVAL. NVAL (input) INTEGER array, dimension (NN) The values of the matrix size N. NNS (input) INTEGER The number of values of NRHS contained in the vector NSVAL. NSVAL (input) INTEGER array, dimension (NNS) The values of the number of right hand sides NRHS. LA (input) INTEGER The size of the arrays A and B. TIMMIN (input) DOUBLE PRECISION The minimum time a subroutine will be timed. A (workspace) COMPLEX*16 array, dimension (LA) B (workspace) COMPLEX*16 array, dimension (NMAX*NMAX) where NMAX is the maximum value of N in NVAL. RESLTS (output) DOUBLE PRECISION array, dimension (LDR1,LDR2,LDR3,NSUBS) The timing results for each subroutine over the relevant values of N. LDR1 (input) INTEGER The first dimension of RESLTS. LDR1 >= 1. LDR2 (input) INTEGER The second dimension of RESLTS. LDR2 >= max(1,NN). LDR3 (input) INTEGER The third dimension of RESLTS. LDR3 >= 2. NOUT (input) INTEGER The unit number for output. ===================================================================== Parameter adjustments */ --nval; --nsval; --a; --b; reslts_dim1 = *ldr1; reslts_dim2 = *ldr2; reslts_dim3 = *ldr3; reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * (1 + reslts_dim3 * 1) ); reslts -= reslts_offset; /* Function Body Extract the timing request from the input line. */ s_copy(path, "Zomplex precision", (ftnlen)1, (ftnlen)17); s_copy(path + 1, "TP", (ftnlen)2, (ftnlen)2); atimin_(path, line, &c__2, subnam, timsub, nout, &info, (ftnlen)3, ( ftnlen)80, (ftnlen)6); if (info != 0) { goto L100; } /* Check that N*(N+1)/2 <= LA for the input values. */ s_copy(cname, line, (ftnlen)6, (ftnlen)6); laval[0] = *la; atimck_(&c__4, cname, nn, &nval[1], &c__1, laval, nout, &info, (ftnlen)6); if (info > 0) { io___8.ciunit = *nout; s_wsfe(&io___8); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); goto L100; } /* Do first for UPLO = 'U', then for UPLO = 'L' */ for (iuplo = 1; iuplo <= 2; ++iuplo) { *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; if (lsame_(uplo, "U")) { mat = 12; } else { mat = -12; } /* Do for each value of N: */ i__1 = *nn; for (in = 1; in <= i__1; ++in) { n = nval[in]; lda = n * (n + 1) / 2; ldb = n; if (n % 2 == 0) { ++ldb; } /* Time ZTPTRI */ if (timsub[0]) { ztimmg_(&mat, &n, &n, &a[1], &lda, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L10: ztptri_(uplo, "Non-unit", &n, &a[1], &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&mat, &n, &n, &a[1], &lda, &c__0, &c__0); goto L10; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L20: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&mat, &n, &n, &a[1], &lda, &c__0, &c__0); goto L20; } time = (time - untime) / (doublereal) ic; ops = dopla_("ZTPTRI", &n, &n, &c__0, &c__0, &c__0) ; reslts_ref(1, in, iuplo, 1) = dmflop_(&ops, &time, &info); } else { /* Generate a triangular matrix A. */ ztimmg_(&mat, &n, &n, &a[1], &lda, &c__0, &c__0); } /* Time ZTPTRS */ if (timsub[1]) { i__2 = *nns; for (i__ = 1; i__ <= i__2; ++i__) { nrhs = nsval[i__]; ztimmg_(&c__0, &n, &nrhs, &b[1], &ldb, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L30: ztptrs_(uplo, "No transpose", "Non-unit", &n, &nrhs, &a[1] , &b[1], &ldb, &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { ztimmg_(&c__0, &n, &nrhs, &b[1], &ldb, &c__0, &c__0); goto L30; } /* Subtract the time used in ZTIMMG. */ icl = 1; s1 = dsecnd_(); L40: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { ztimmg_(&c__0, &n, &nrhs, &b[1], &ldb, &c__0, &c__0); goto L40; } time = (time - untime) / (doublereal) ic; ops = dopla_("ZTPTRS", &n, &nrhs, &c__0, &c__0, &c__0); reslts_ref(i__, in, iuplo, 2) = dmflop_(&ops, &time, & info); /* L50: */ } } /* L60: */ } /* L70: */ } /* Print a table of results. */ for (isub = 1; isub <= 2; ++isub) { if (! timsub[isub - 1]) { goto L90; } io___26.ciunit = *nout; s_wsfe(&io___26); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); e_wsfe(); for (iuplo = 1; iuplo <= 2; ++iuplo) { io___27.ciunit = *nout; s_wsfe(&io___27); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); do_fio(&c__1, uplos + (iuplo - 1), (ftnlen)1); e_wsfe(); if (isub == 1) { dprtbl_(" ", "N", &c__1, idummy, nn, &nval[1], &c__1, & reslts_ref(1, 1, iuplo, 1), ldr1, ldr2, nout, (ftnlen) 1, (ftnlen)1); } else if (isub == 2) { dprtbl_("NRHS", "N", nns, &nsval[1], nn, &nval[1], &c__1, & reslts_ref(1, 1, iuplo, 2), ldr1, ldr2, nout, (ftnlen) 4, (ftnlen)1); } /* L80: */ } L90: ; } L100: return 0; /* End of ZTIMTP */ } /* ztimtp_ */
/* Main program */ MAIN__(void) { /* Initialized data */ static integer iseed[4] = { 0,0,0,1 }; static integer mxtype[4] = { 8,4,5,4 }; /* Format strings */ static char fmt_9993[] = "(\002 Timing the Nonsymmetric Eigenvalue Probl" "em routines\002,/\002 DGEHRD, DHSEQR, DTREVC, and DHSEIN\002)"; static char fmt_9992[] = "(\002 Timing the Symmetric Eigenvalue Problem " "routines\002,/\002 DSYTRD, DSTEQR, and DSTERF\002)"; static char fmt_9991[] = "(\002 Timing the Singular Value Decomposition " "routines\002,/\002 DGEBRD, DBDSQR, DORGBR, DBDSDC and DGESD" "D\002)"; static char fmt_9990[] = "(\002 Timing the Generalized Eigenvalue Proble" "m routines\002,/\002 DGGHRD, DHGEQZ, and DTGEVC \002)"; static char fmt_9996[] = "(1x,a3,\002: Unrecognized path name\002)"; static char fmt_9985[] = "(/\002 LAPACK VERSION 3.0, released June 30, 1" "999 \002)"; static char fmt_9989[] = "(/\002 The following parameter values will be " "used:\002)"; static char fmt_9995[] = "(\002 *** Invalid input value: \002,a6,\002" "=\002,i6,\002; must be >=\002,i6)"; static char fmt_9994[] = "(\002 *** Invalid input value: \002,a6,\002" "=\002,i6,\002; must be <=\002,i6)"; static char fmt_9988[] = "(\002 Values of \002,a5,\002: \002,10i6,/1" "9x,10i6)"; static char fmt_9987[] = "(/\002 Minimum time a subroutine will be timed" " = \002,f8.2,\002 seconds\002,/)"; static char fmt_9999[] = "(/\002 Execution not attempted due to input er" "rors\002)"; static char fmt_9986[] = "(\002 *** Error code from \002,a6,\002 = \002," "i4)"; static char fmt_9998[] = "(//\002 End of timing run\002)"; static char fmt_9997[] = "(\002 Total time used = \002,f12.2,\002 seco" "nds\002,/)"; /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ integer s_rsfe(cilist *), do_fio(integer *, char *, ftnlen), e_rsfe(void), s_wsfe(cilist *), e_wsfe(void); /* Subroutine */ int s_stop(char *, ftnlen); integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void); /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsle(cilist *), e_wsle(void); /* Local variables */ static char line[80]; static integer info; static char path[3]; static integer mval[12], nval[12]; static doublereal work[649241], a[1008000] /* was [168000][6] */, d__[ 1600] /* was [400][4] */; static integer i__; static logical fatal; extern /* Subroutine */ int dtim21_(char *, integer *, integer *, integer *, logical *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, logical *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), dtim22_(char *, integer *, integer *, integer *, logical *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, logical *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen); static integer nbval[10]; extern /* Subroutine */ int dtim51_(char *, integer *, integer *, integer *, logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, logical *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen), dtim26_(char *, integer *, integer *, integer *, integer *, logical *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, logical *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *, ftnlen); static char vname[6]; static integer nsval[10]; static char c3[3]; static integer iwork[10]; static doublereal s1, s2; static integer iwork2[20406], nn; extern doublereal dsecnd_(void); static integer ldaval[10], nbkval[10], nbmval[10]; extern logical lsamen_(integer *, char *, char *); static integer mxbval[10]; static doublereal timmin; static integer nparms; static logical dotype[10], logwrk[400]; static doublereal opcnts[30000] /* was [10][10][12][25] */, result[ 30000] /* was [10][10][12][25] */; static integer maxtyp, ntypes; static logical gep, nep, sep, svd; /* Fortran I/O blocks */ static cilist io___9 = { 0, 5, 1, "(A3)", 0 }; static cilist io___11 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___12 = { 0, 6, 0, fmt_9992, 0 }; static cilist io___13 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___14 = { 0, 6, 0, fmt_9990, 0 }; static cilist io___15 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___16 = { 0, 6, 0, fmt_9985, 0 }; static cilist io___17 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___18 = { 0, 5, 0, 0, 0 }; static cilist io___20 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___21 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___26 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___27 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___28 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___32 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___33 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___37 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 5, 0, 0, 0 }; static cilist io___40 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___41 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___42 = { 0, 5, 0, 0, 0 }; static cilist io___44 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___45 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___46 = { 0, 5, 0, 0, 0 }; static cilist io___48 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___49 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___50 = { 0, 5, 0, 0, 0 }; static cilist io___52 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___53 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___54 = { 0, 5, 0, 0, 0 }; static cilist io___56 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___57 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___58 = { 0, 5, 0, 0, 0 }; static cilist io___60 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___61 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___62 = { 0, 6, 0, fmt_9988, 0 }; static cilist io___63 = { 0, 5, 0, 0, 0 }; static cilist io___65 = { 0, 6, 0, fmt_9987, 0 }; static cilist io___66 = { 0, 5, 0, 0, 0 }; static cilist io___68 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___70 = { 0, 5, 0, 0, 0 }; static cilist io___73 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___74 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___75 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___76 = { 0, 5, 1, "(A80)", 0 }; static cilist io___87 = { 0, 6, 0, fmt_9986, 0 }; static cilist io___88 = { 0, 6, 0, fmt_9986, 0 }; static cilist io___89 = { 0, 6, 0, fmt_9986, 0 }; static cilist io___90 = { 0, 6, 0, fmt_9986, 0 }; static cilist io___91 = { 0, 6, 0, 0, 0 }; static cilist io___92 = { 0, 6, 0, 0, 0 }; static cilist io___93 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___94 = { 0, 6, 0, fmt_9998, 0 }; static cilist io___96 = { 0, 6, 0, fmt_9997, 0 }; #define a_ref(a_1,a_2) a[(a_2)*168000 + a_1 - 168001] #define d___ref(a_1,a_2) d__[(a_2)*400 + a_1 - 401] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DTIMEE is the main timing program for the DOUBLE PRECISION matrix eigenvalue routines in LAPACK. There are four sets of routines that can be timed: NEP (Nonsymmetric Eigenvalue Problem): Includes DGEHRD, DHSEQR, DTREVC, and DHSEIN SEP (Symmetric Eigenvalue Problem): Includes DSYTRD, DORGTR, DORMTR, DSTEQR, DSTERF, DPTEQR, DSTEBZ, DSTEIN, and DSTEDC SVD (Singular Value Decomposition): Includes DGEBRD, DBDSQR, DORGBR, DBDSDC and DGESDD GEP (Generalized nonsymmetric Eigenvalue Problem): Includes DGGHRD, DHGEQZ, and DTGEVC Each test path has a different input file. The first line of the input file should contain the characters NEP, SEP, SVD, or GEP in columns 1-3. The number of remaining lines depends on what is found on the first line. ----------------------------------------------------------------------- NEP input file: line 2: NN, INTEGER Number of values of N. line 3: NVAL, INTEGER array, dimension (NN) The values for the matrix dimension N. line 4: NPARM, INTEGER Number of values of the parameters NB, NS, MAXB, and LDA. line 5: NBVAL, INTEGER array, dimension (NPARM) The values for the blocksize NB. line 6: NSVAL, INTEGER array, dimension (NPARM) The values for the number of shifts. line 7: MXBVAL, INTEGER array, dimension (NPARM) The values for MAXB, used in determining whether multishift will be used. line 8: LDAVAL, INTEGER array, dimension (NPARM) The values for the leading dimension LDA. line 9: TIMMIN, DOUBLE PRECISION The minimum time (in seconds) that a subroutine will be timed. If TIMMIN is zero, each routine should be timed only once. line 10: NTYPES, INTEGER The number of matrix types to be used in the timing run. If NTYPES >= MAXTYP, all the types are used. If 0 < NTYPES < MAXTYP, then line 11 specifies NTYPES integer values, which are the numbers of the matrix types to be used. The remaining lines specify a path name and the specific routines to be timed. For the nonsymmetric eigenvalue problem, the path name is 'DHS'. A line to request all the routines in this path has the form DHS T T T T T T T T T T T T where the first 3 characters specify the path name, and up to MAXTYP nonblank characters may appear in columns 4-80. If the k-th such character is 'T' or 't', the k-th routine will be timed. If at least one but fewer than 12 nonblank characters are specified, the remaining routines will not be timed. If columns 4-80 are blank, all the routines will be timed, so the input line DHS is equivalent to the line above. ----------------------------------------------------------------------- SEP input file: line 2: NN, INTEGER Number of values of N. line 3: NVAL, INTEGER array, dimension (NN) The values for the matrix dimension N. line 4: NPARM, INTEGER Number of values of the parameters NB and LDA. line 5: NBVAL, INTEGER array, dimension (NPARM) The values for the blocksize NB. line 6: LDAVAL, INTEGER array, dimension (NPARM) The values for the leading dimension LDA. line 7: TIMMIN, DOUBLE PRECISION The minimum time (in seconds) that a subroutine will be timed. If TIMMIN is zero, each routine should be timed only once. line 8: NTYPES, INTEGER The number of matrix types to be used in the timing run. If NTYPES >= MAXTYP, all the types are used. If 0 < NTYPES < MAXTYP, then line 9 specifies NTYPES integer values, which are the numbers of the matrix types to be used. The remaining lines specify a path name and the specific routines to be timed as for the NEP input file. For the symmetric eigenvalue problem, the path name is 'DST' and up to 8 routines may be timed. ----------------------------------------------------------------------- SVD input file: line 2: NN, INTEGER Number of values of M and N. line 3: MVAL, INTEGER array, dimension (NN) The values for the matrix dimension M. line 4: NVAL, INTEGER array, dimension (NN) The values for the matrix dimension N. line 5: NPARM, INTEGER Number of values of the parameters NB and LDA. line 6: NBVAL, INTEGER array, dimension (NPARM) The values for the blocksize NB. line 7: LDAVAL, INTEGER array, dimension (NPARM) The values for the leading dimension LDA. line 8: TIMMIN, DOUBLE PRECISION The minimum time (in seconds) that a subroutine will be timed. If TIMMIN is zero, each routine should be timed only once. line 9: NTYPES, INTEGER The number of matrix types to be used in the timing run. If NTYPES >= MAXTYP, all the types are used. If 0 < NTYPES < MAXTYP, then line 10 specifies NTYPES integer values, which are the numbers of the matrix types to be used. The remaining lines specify a path name and the specific routines to be timed as for the NEP input file. For the singular value decomposition the path name is 'DBD' and up to 16 routines may be timed. ----------------------------------------------------------------------- GEP input file: line 2: NN, INTEGER Number of values of N. line 3: NVAL, INTEGER array, dimension (NN) The values for the matrix dimension N. line 4: NPARM, INTEGER Number of values of the parameters NB, NS, MAXB, and LDA. line 5: NBVAL, INTEGER array, dimension (NPARM) The values for the blocksize NB. line 6: NSVAL, INTEGER array, dimension (NPARM) The values for the number of shifts. line 7: NEIVAL, INTEGER array, dimension (NPARM) The values for NEISP, used in determining whether multishift will be used. line 8: NBMVAL, INTEGER array, dimension (NPARM) The values for MINNB, used in determining minimum blocksize. line 9: NBKVAL, INTEGER array, dimension (NPARM) The values for MINBLK, also used in determining minimum blocksize. line 10: LDAVAL, INTEGER array, dimension (NPARM) The values for the leading dimension LDA. line 11: TIMMIN, DOUBLE PRECISION The minimum time (in seconds) that a subroutine will be timed. If TIMMIN is zero, each routine should be timed only once. line 12: NTYPES, INTEGER The number of matrix types to be used in the timing run. If NTYPES >= MAXTYP, all the types are used. If 0 < NTYPES < MAXTYP, then line 13 specifies NTYPES integer values, which are the numbers of the matrix types to be used. The remaining lines specify a path name and the specific routines to be timed. For the nonsymmetric eigenvalue problem, the path name is 'DHG'. A line to request all the routines in this path has the form DHG T T T T T T T T T T T T T T T T T T where the first 3 characters specify the path name, and up to MAXTYP nonblank characters may appear in columns 4-80. If the k-th such character is 'T' or 't', the k-th routine will be timed. If at least one but fewer than 18 nonblank characters are specified, the remaining routines will not be timed. If columns 4-80 are blank, all the routines will be timed, so the input line DHG is equivalent to the line above. ======================================================================= The workspace requirements in terms of square matrices for the different test paths are as follows: NEP: 3 N**2 + N*(3*NB+2) SEP: 2 N**2 + N*(2*N) + N SVD: 4 N**2 + MAX( 6*N, MAXIN*MAXPRM*MAXT ) GEP: 6 N**2 + 3*N MAXN is currently set to 400, LG2MXN = ceiling of log-base-2 of MAXN = 9, and LDAMAX = 420. The real work space needed is LWORK = MAX( MAXN*(4*MAXN+2), 2*LDAMAX+1+3*MAXN+2*MAXN*LG2MXN+3*MAXN**2 ), and the integer workspace needed is LIWRK2 = 6 + 6*MAXN + 5*MAXN*LG2MXN. For SVD, we assume NRHS may be as big as N. The parameter NEED is set to 4 to allow for 4 NxN matrices for SVD. */ s1 = dsecnd_(); fatal = FALSE_; nep = FALSE_; sep = FALSE_; svd = FALSE_; gep = FALSE_; /* Read the 3-character test path */ i__1 = s_rsfe(&io___9); if (i__1 != 0) { goto L160; } i__1 = do_fio(&c__1, path, (ftnlen)3); if (i__1 != 0) { goto L160; } i__1 = e_rsfe(); if (i__1 != 0) { goto L160; } nep = lsamen_(&c__3, path, "NEP") || lsamen_(&c__3, path, "DHS"); sep = lsamen_(&c__3, path, "SEP") || lsamen_(&c__3, path, "DST"); svd = lsamen_(&c__3, path, "SVD") || lsamen_(&c__3, path, "DBD"); gep = lsamen_(&c__3, path, "GEP") || lsamen_(&c__3, path, "DHG"); /* Report values of parameters as they are read. */ if (nep) { s_wsfe(&io___11); e_wsfe(); } else if (sep) { s_wsfe(&io___12); e_wsfe(); } else if (svd) { s_wsfe(&io___13); e_wsfe(); } else if (gep) { s_wsfe(&io___14); e_wsfe(); } else { s_wsfe(&io___15); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); s_stop("", (ftnlen)0); } s_wsfe(&io___16); e_wsfe(); s_wsfe(&io___17); e_wsfe(); /* Read the number of values of M and N. */ s_rsle(&io___18); do_lio(&c__3, &c__1, (char *)&nn, (ftnlen)sizeof(integer)); e_rsle(); if (nn < 1) { s_wsfe(&io___20); do_fio(&c__1, "NN ", (ftnlen)4); do_fio(&c__1, (char *)&nn, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nn = 0; fatal = TRUE_; } else if (nn > 12) { s_wsfe(&io___21); do_fio(&c__1, "NN ", (ftnlen)4); do_fio(&c__1, (char *)&nn, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__12, (ftnlen)sizeof(integer)); e_wsfe(); nn = 0; fatal = TRUE_; } /* Read the values of M */ s_rsle(&io___22); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); if (svd) { s_copy(vname, " M", (ftnlen)6, (ftnlen)3); } else { s_copy(vname, " N", (ftnlen)6, (ftnlen)3); } i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { if (mval[i__ - 1] < 0) { s_wsfe(&io___26); do_fio(&c__1, vname, (ftnlen)6); do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (mval[i__ - 1] > 400) { s_wsfe(&io___27); do_fio(&c__1, vname, (ftnlen)6); do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__400, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L10: */ } /* Read the values of N */ if (svd) { s_wsfe(&io___28); do_fio(&c__1, "M ", (ftnlen)4); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); s_rsle(&io___29); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof( integer)); } e_rsle(); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { if (nval[i__ - 1] < 0) { s_wsfe(&io___31); do_fio(&c__1, "N ", (ftnlen)4); do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (nval[i__ - 1] > 400) { s_wsfe(&io___32); do_fio(&c__1, "N ", (ftnlen)4); do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&c__400, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L20: */ } } else { i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { nval[i__ - 1] = mval[i__ - 1]; /* L30: */ } } s_wsfe(&io___33); do_fio(&c__1, "N ", (ftnlen)4); i__1 = nn; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); /* Read the number of parameter values. */ s_rsle(&io___34); do_lio(&c__3, &c__1, (char *)&nparms, (ftnlen)sizeof(integer)); e_rsle(); if (nparms < 1) { s_wsfe(&io___36); do_fio(&c__1, "NPARMS", (ftnlen)6); do_fio(&c__1, (char *)&nparms, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nparms = 0; fatal = TRUE_; } else if (nparms > 12) { s_wsfe(&io___37); do_fio(&c__1, "NPARMS", (ftnlen)6); do_fio(&c__1, (char *)&nparms, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__12, (ftnlen)sizeof(integer)); e_wsfe(); nparms = 0; fatal = TRUE_; } /* Read the values of NB */ s_rsle(&io___38); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nbval[i__ - 1], (ftnlen)sizeof(integer)) ; } e_rsle(); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { if (nbval[i__ - 1] < 0) { s_wsfe(&io___40); do_fio(&c__1, "NB ", (ftnlen)4); do_fio(&c__1, (char *)&nbval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L40: */ } s_wsfe(&io___41); do_fio(&c__1, "NB ", (ftnlen)4); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nbval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); if (nep || gep) { /* Read the values of NSHIFT */ s_rsle(&io___42); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof( integer)); } e_rsle(); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { if (nsval[i__ - 1] < 0) { s_wsfe(&io___44); do_fio(&c__1, "NS ", (ftnlen)4); do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer) ); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L50: */ } s_wsfe(&io___45); do_fio(&c__1, "NS ", (ftnlen)4); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); /* Read the values of MAXB */ s_rsle(&io___46); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&mxbval[i__ - 1], (ftnlen)sizeof( integer)); } e_rsle(); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { if (mxbval[i__ - 1] < 0) { s_wsfe(&io___48); do_fio(&c__1, "MAXB", (ftnlen)4); do_fio(&c__1, (char *)&mxbval[i__ - 1], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L60: */ } s_wsfe(&io___49); do_fio(&c__1, "MAXB", (ftnlen)4); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&mxbval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } else { i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { nsval[i__ - 1] = 1; mxbval[i__ - 1] = 1; /* L70: */ } } if (gep) { /* Read the values of NBMIN */ s_rsle(&io___50); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nbmval[i__ - 1], (ftnlen)sizeof( integer)); } e_rsle(); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { if (nbmval[i__ - 1] < 0) { s_wsfe(&io___52); do_fio(&c__1, "NBMIN", (ftnlen)5); do_fio(&c__1, (char *)&nbmval[i__ - 1], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L80: */ } s_wsfe(&io___53); do_fio(&c__1, "NBMIN", (ftnlen)5); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nbmval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); /* Read the values of MINBLK */ s_rsle(&io___54); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nbkval[i__ - 1], (ftnlen)sizeof( integer)); } e_rsle(); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { if (nbkval[i__ - 1] < 0) { s_wsfe(&io___56); do_fio(&c__1, "MINBLK", (ftnlen)6); do_fio(&c__1, (char *)&nbkval[i__ - 1], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L90: */ } s_wsfe(&io___57); do_fio(&c__1, "MINBLK", (ftnlen)6); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nbkval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } else { i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { nbmval[i__ - 1] = 401; nbkval[i__ - 1] = 401; /* L100: */ } } /* Read the values of LDA */ s_rsle(&io___58); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&ldaval[i__ - 1], (ftnlen)sizeof(integer) ); } e_rsle(); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { if (ldaval[i__ - 1] < 0) { s_wsfe(&io___60); do_fio(&c__1, "LDA ", (ftnlen)4); do_fio(&c__1, (char *)&ldaval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (ldaval[i__ - 1] > 420) { s_wsfe(&io___61); do_fio(&c__1, "LDA ", (ftnlen)4); do_fio(&c__1, (char *)&ldaval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__420, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L110: */ } s_wsfe(&io___62); do_fio(&c__1, "LDA ", (ftnlen)4); i__1 = nparms; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&ldaval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); /* Read the minimum time a subroutine will be timed. */ s_rsle(&io___63); do_lio(&c__5, &c__1, (char *)&timmin, (ftnlen)sizeof(doublereal)); e_rsle(); s_wsfe(&io___65); do_fio(&c__1, (char *)&timmin, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Read the number of matrix types to use in timing. */ s_rsle(&io___66); do_lio(&c__3, &c__1, (char *)&ntypes, (ftnlen)sizeof(integer)); e_rsle(); if (ntypes < 0) { s_wsfe(&io___68); do_fio(&c__1, "NTYPES", (ftnlen)6); do_fio(&c__1, (char *)&ntypes, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; ntypes = 0; } /* Read the matrix types. */ if (nep) { maxtyp = mxtype[0]; } else if (sep) { maxtyp = mxtype[1]; } else if (svd) { maxtyp = mxtype[2]; } else { maxtyp = mxtype[3]; } if (ntypes < maxtyp) { s_rsle(&io___70); i__1 = ntypes; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&iwork[i__ - 1], (ftnlen)sizeof( integer)); } e_rsle(); i__1 = maxtyp; for (i__ = 1; i__ <= i__1; ++i__) { dotype[i__ - 1] = FALSE_; /* L120: */ } i__1 = ntypes; for (i__ = 1; i__ <= i__1; ++i__) { if (iwork[i__ - 1] < 0) { s_wsfe(&io___73); do_fio(&c__1, "TYPE", (ftnlen)4); do_fio(&c__1, (char *)&iwork[i__ - 1], (ftnlen)sizeof(integer) ); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (iwork[i__ - 1] > maxtyp) { s_wsfe(&io___74); do_fio(&c__1, "TYPE", (ftnlen)4); do_fio(&c__1, (char *)&iwork[i__ - 1], (ftnlen)sizeof(integer) ); do_fio(&c__1, (char *)&maxtyp, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else { dotype[iwork[i__ - 1] - 1] = TRUE_; } /* L130: */ } } else { ntypes = maxtyp; for (i__ = 1; i__ <= 10; ++i__) { dotype[i__ - 1] = TRUE_; /* L140: */ } } if (fatal) { s_wsfe(&io___75); e_wsfe(); s_stop("", (ftnlen)0); } /* Read the input lines indicating the test path and the routines to be timed. The first three characters indicate the test path. */ L150: i__1 = s_rsfe(&io___76); if (i__1 != 0) { goto L160; } i__1 = do_fio(&c__1, line, (ftnlen)80); if (i__1 != 0) { goto L160; } i__1 = e_rsfe(); if (i__1 != 0) { goto L160; } s_copy(c3, line, (ftnlen)3, (ftnlen)3); /* ------------------------------------- NEP: Nonsymmetric Eigenvalue Problem ------------------------------------- */ if (lsamen_(&c__3, c3, "DHS") || lsamen_(&c__3, c3, "NEP")) { dtim21_(line, &nn, nval, &maxtyp, dotype, &nparms, nbval, nsval, mxbval, ldaval, &timmin, &c__6, iseed, &a_ref(1, 1), &a_ref(1, 2), &a_ref(1, 3), &d___ref(1, 1), work, &c_b226, logwrk, iwork2, result, &c__10, &c__10, &c__12, opcnts, &c__10, & c__10, &c__12, &info, (ftnlen)80); if (info != 0) { s_wsfe(&io___87); do_fio(&c__1, "DTIM21", (ftnlen)6); do_fio(&c__1, (char *)&info, (ftnlen)sizeof(integer)); e_wsfe(); } /* ---------------------------------- SEP: Symmetric Eigenvalue Problem ---------------------------------- */ } else if (lsamen_(&c__3, c3, "DST") || lsamen_(& c__3, c3, "SEP")) { dtim22_(line, &nn, nval, &maxtyp, dotype, &nparms, nbval, ldaval, & timmin, &c__6, iseed, &a_ref(1, 1), &d___ref(1, 1), &d___ref( 1, 2), &d___ref(1, 3), &a_ref(1, 2), &a_ref(1, 3), work, & c_b226, logwrk, iwork2, result, &c__10, &c__10, &c__12, opcnts, &c__10, &c__10, &c__12, &info, (ftnlen)80); if (info != 0) { s_wsfe(&io___88); do_fio(&c__1, "DTIM22", (ftnlen)6); do_fio(&c__1, (char *)&info, (ftnlen)sizeof(integer)); e_wsfe(); } /* ---------------------------------- SVD: Singular Value Decomposition ---------------------------------- */ } else if (lsamen_(&c__3, c3, "DBD") || lsamen_(& c__3, c3, "SVD")) { dtim26_(line, &nn, nval, mval, &maxtyp, dotype, &nparms, nbval, ldaval, &timmin, &c__6, iseed, &a_ref(1, 1), &a_ref(1, 2), & a_ref(1, 3), &a_ref(1, 4), &d___ref(1, 1), &d___ref(1, 2), & d___ref(1, 3), &d___ref(1, 4), work, &c_b226, iwork2, logwrk, result, &c__10, &c__10, &c__12, opcnts, &c__10, &c__10, & c__12, &info, (ftnlen)80); if (info != 0) { s_wsfe(&io___89); do_fio(&c__1, "DTIM26", (ftnlen)6); do_fio(&c__1, (char *)&info, (ftnlen)sizeof(integer)); e_wsfe(); } /* ------------------------------------------------- GEP: Generalized Nonsymmetric Eigenvalue Problem ------------------------------------------------- */ } else if (lsamen_(&c__3, c3, "DHG") || lsamen_(& c__3, c3, "GEP")) { dtim51_(line, &nn, nval, &maxtyp, dotype, &nparms, nbval, nsval, mxbval, nbmval, nbkval, ldaval, &timmin, &c__6, iseed, &a_ref( 1, 1), &a_ref(1, 2), &a_ref(1, 3), &a_ref(1, 4), &a_ref(1, 5), &a_ref(1, 6), &d___ref(1, 1), work, &c_b226, logwrk, result, &c__10, &c__10, &c__12, opcnts, &c__10, &c__10, &c__12, &info, (ftnlen)80); if (info != 0) { s_wsfe(&io___90); do_fio(&c__1, "DTIM51", (ftnlen)6); do_fio(&c__1, (char *)&info, (ftnlen)sizeof(integer)); e_wsfe(); } } else { s_wsle(&io___91); e_wsle(); s_wsle(&io___92); e_wsle(); s_wsfe(&io___93); do_fio(&c__1, c3, (ftnlen)3); e_wsfe(); } goto L150; L160: s_wsfe(&io___94); e_wsfe(); s2 = dsecnd_(); s_wsfe(&io___96); d__1 = s2 - s1; do_fio(&c__1, (char *)&d__1, (ftnlen)sizeof(doublereal)); e_wsfe(); /* End of DTIMEE */ return 0; } /* MAIN__ */
/* Main program */ int MAIN__(void) { /* Initialized data */ static char intstr[10] = "0123456789"; /* Format strings */ static char fmt_9994[] = "(\002 Tests of the DOUBLE PRECISION LAPACK DSG" "ESV/DSPOSV\002,\002 routines \002,/\002 LAPACK VERSION \002,i1" ",\002.\002,i1,\002.\002,i1,//\002 The following parameter values" " will be used:\002)"; static char fmt_9996[] = "(\002 Invalid input value: \002,a4,\002=\002,i" "6,\002; must be >=\002,i6)"; static char fmt_9995[] = "(\002 Invalid input value: \002,a4,\002=\002,i" "6,\002; must be <=\002,i6)"; static char fmt_9993[] = "(4x,a4,\002: \002,10i6,/11x,10i6)"; static char fmt_9992[] = "(/\002 Routines pass computational tests if te" "st ratio is \002,\002less than\002,f8.2,/)"; static char fmt_9999[] = "(/\002 Execution not attempted due to input er" "rors\002)"; static char fmt_9991[] = "(\002 Relative machine \002,a,\002 is taken to" " be\002,d16.6)"; static char fmt_9990[] = "(/1x,a6,\002 routines were not tested\002)"; static char fmt_9989[] = "(/1x,a6,\002 driver routines were not teste" "d\002)"; static char fmt_9998[] = "(/\002 End of tests\002)"; static char fmt_9997[] = "(\002 Total time used = \002,f12.2,\002 seco" "nds\002,/)"; /* System generated locals */ integer i__1; doublereal d__1; cilist ci__1; cllist cl__1; /* Local variables */ doublereal a[34848] /* was [17424][2] */, b[4224] /* was [2112][2] */; integer i__, k; char c1[1], c2[2]; doublereal s1, s2; integer ic, nm, vers_patch__, vers_major__, vers_minor__, lda; doublereal eps; integer nns; char path[3]; integer mval[12], nrhs; real seps; doublereal work[4224]; logical fatal; char aline[72]; integer nmats, nsval[12], iwork[132]; doublereal rwork[132]; real swork[19536]; doublereal thresh; logical dotype[30]; integer ntypes; logical tsterr, tstdrv; /* Fortran I/O blocks */ static cilist io___5 = { 0, 5, 0, 0, 0 }; static cilist io___9 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___10 = { 0, 5, 0, 0, 0 }; static cilist io___12 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___13 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___14 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___18 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___19 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___20 = { 0, 5, 0, 0, 0 }; static cilist io___22 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___23 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___24 = { 0, 5, 0, 0, 0 }; static cilist io___26 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___27 = { 0, 6, 0, fmt_9995, 0 }; static cilist io___28 = { 0, 6, 0, fmt_9993, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 6, 0, fmt_9992, 0 }; static cilist io___32 = { 0, 5, 0, 0, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___38 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___39 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___40 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___41 = { 0, 6, 0, 0, 0 }; static cilist io___43 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___44 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___45 = { 0, 6, 0, fmt_9991, 0 }; static cilist io___46 = { 0, 6, 0, 0, 0 }; static cilist io___55 = { 0, 6, 0, fmt_9990, 0 }; static cilist io___56 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___65 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___66 = { 0, 6, 0, fmt_9989, 0 }; static cilist io___68 = { 0, 6, 0, fmt_9998, 0 }; static cilist io___69 = { 0, 6, 0, fmt_9997, 0 }; /* -- LAPACK test routine (version 3.1.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* January 2007 */ /* Purpose */ /* ======= */ /* DCHKAB is the test program for the DOUBLE PRECISION LAPACK */ /* DSGESV/DSPOSV routine */ /* The program must be driven by a short data file. The first 5 records */ /* specify problem dimensions and program options using list-directed */ /* input. The remaining lines specify the LAPACK test paths and the */ /* number of matrix types to use in testing. An annotated example of a */ /* data file can be obtained by deleting the first 3 characters from the */ /* following 10 lines: */ /* Data file for testing DOUBLE PRECISION LAPACK DSGESV */ /* 7 Number of values of M */ /* 0 1 2 3 5 10 16 Values of M (row dimension) */ /* 1 Number of values of NRHS */ /* 2 Values of NRHS (number of right hand sides) */ /* 20.0 Threshold value of test ratio */ /* T Put T to test the LAPACK routines */ /* T Put T to test the error exits */ /* DGE 11 List types on next line if 0 < NTYPES < 11 */ /* DPO 9 List types on next line if 0 < NTYPES < 9 */ /* Internal Parameters */ /* =================== */ /* NMAX INTEGER */ /* The maximum allowable value for N */ /* MAXIN INTEGER */ /* The number of different values that can be used for each of */ /* M, N, NRHS, NB, and NX */ /* MAXRHS INTEGER */ /* The maximum number of right hand sides */ /* NIN INTEGER */ /* The unit number for input */ /* NOUT INTEGER */ /* The unit number for output */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Scalars in Common .. */ /* .. */ /* .. Common blocks .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ s1 = dsecnd_(); lda = 132; fatal = FALSE_; /* Read a dummy line. */ s_rsle(&io___5); e_rsle(); /* Report values of parameters. */ ilaver_(&vers_major__, &vers_minor__, &vers_patch__); s_wsfe(&io___9); do_fio(&c__1, (char *)&vers_major__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&vers_minor__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&vers_patch__, (ftnlen)sizeof(integer)); e_wsfe(); /* Read the values of M */ s_rsle(&io___10); do_lio(&c__3, &c__1, (char *)&nm, (ftnlen)sizeof(integer)); e_rsle(); if (nm < 1) { s_wsfe(&io___12); do_fio(&c__1, " NM ", (ftnlen)4); do_fio(&c__1, (char *)&nm, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nm = 0; fatal = TRUE_; } else if (nm > 12) { s_wsfe(&io___13); do_fio(&c__1, " NM ", (ftnlen)4); do_fio(&c__1, (char *)&nm, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__12, (ftnlen)sizeof(integer)); e_wsfe(); nm = 0; fatal = TRUE_; } s_rsle(&io___14); i__1 = nm; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nm; for (i__ = 1; i__ <= i__1; ++i__) { if (mval[i__ - 1] < 0) { s_wsfe(&io___17); do_fio(&c__1, " M ", (ftnlen)4); do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (mval[i__ - 1] > 132) { s_wsfe(&io___18); do_fio(&c__1, " M ", (ftnlen)4); do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__132, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L10: */ } if (nm > 0) { s_wsfe(&io___19); do_fio(&c__1, "M ", (ftnlen)4); i__1 = nm; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&mval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Read the values of NRHS */ s_rsle(&io___20); do_lio(&c__3, &c__1, (char *)&nns, (ftnlen)sizeof(integer)); e_rsle(); if (nns < 1) { s_wsfe(&io___22); do_fio(&c__1, " NNS", (ftnlen)4); do_fio(&c__1, (char *)&nns, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); e_wsfe(); nns = 0; fatal = TRUE_; } else if (nns > 12) { s_wsfe(&io___23); do_fio(&c__1, " NNS", (ftnlen)4); do_fio(&c__1, (char *)&nns, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__12, (ftnlen)sizeof(integer)); e_wsfe(); nns = 0; fatal = TRUE_; } s_rsle(&io___24); i__1 = nns; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)) ; } e_rsle(); i__1 = nns; for (i__ = 1; i__ <= i__1; ++i__) { if (nsval[i__ - 1] < 0) { s_wsfe(&io___26); do_fio(&c__1, "NRHS", (ftnlen)4); do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__0, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } else if (nsval[i__ - 1] > 16) { s_wsfe(&io___27); do_fio(&c__1, "NRHS", (ftnlen)4); do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__16, (ftnlen)sizeof(integer)); e_wsfe(); fatal = TRUE_; } /* L30: */ } if (nns > 0) { s_wsfe(&io___28); do_fio(&c__1, "NRHS", (ftnlen)4); i__1 = nns; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&nsval[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); } /* Read the threshold value for the test ratios. */ s_rsle(&io___29); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); s_wsfe(&io___31); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Read the flag that indicates whether to test the driver routine. */ s_rsle(&io___32); do_lio(&c__8, &c__1, (char *)&tstdrv, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether to test the error exits. */ s_rsle(&io___34); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); if (fatal) { s_wsfe(&io___36); e_wsfe(); s_stop("", (ftnlen)0); } /* Calculate and print the machine dependent constants. */ seps = slamch_("Underflow threshold"); s_wsfe(&io___38); do_fio(&c__1, "(single precision) underflow", (ftnlen)28); do_fio(&c__1, (char *)&seps, (ftnlen)sizeof(real)); e_wsfe(); seps = slamch_("Overflow threshold"); s_wsfe(&io___39); do_fio(&c__1, "(single precision) overflow ", (ftnlen)28); do_fio(&c__1, (char *)&seps, (ftnlen)sizeof(real)); e_wsfe(); seps = slamch_("Epsilon"); s_wsfe(&io___40); do_fio(&c__1, "(single precision) precision", (ftnlen)28); do_fio(&c__1, (char *)&seps, (ftnlen)sizeof(real)); e_wsfe(); s_wsle(&io___41); e_wsle(); eps = dlamch_("Underflow threshold"); s_wsfe(&io___43); do_fio(&c__1, "(double precision) underflow", (ftnlen)28); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); eps = dlamch_("Overflow threshold"); s_wsfe(&io___44); do_fio(&c__1, "(double precision) overflow ", (ftnlen)28); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); eps = dlamch_("Epsilon"); s_wsfe(&io___45); do_fio(&c__1, "(double precision) precision", (ftnlen)28); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); s_wsle(&io___46); e_wsle(); L80: /* Read a test path and the number of matrix types to use. */ ci__1.cierr = 0; ci__1.ciend = 1; ci__1.ciunit = 5; ci__1.cifmt = "(A72)"; i__1 = s_rsfe(&ci__1); if (i__1 != 0) { goto L140; } i__1 = do_fio(&c__1, aline, (ftnlen)72); if (i__1 != 0) { goto L140; } i__1 = e_rsfe(); if (i__1 != 0) { goto L140; } s_copy(path, aline, (ftnlen)3, (ftnlen)3); nmats = 30; i__ = 3; L90: ++i__; if (i__ > 72) { nmats = 30; goto L130; } if (*(unsigned char *)&aline[i__ - 1] == ' ') { goto L90; } nmats = 0; L100: *(unsigned char *)c1 = *(unsigned char *)&aline[i__ - 1]; for (k = 1; k <= 10; ++k) { if (*(unsigned char *)c1 == *(unsigned char *)&intstr[k - 1]) { ic = k - 1; goto L120; } /* L110: */ } goto L130; L120: nmats = nmats * 10 + ic; ++i__; if (i__ > 72) { goto L130; } goto L100; L130: *(unsigned char *)c1 = *(unsigned char *)path; s_copy(c2, path + 1, (ftnlen)2, (ftnlen)2); nrhs = nsval[0]; /* Check first character for correct precision. */ if (! lsame_(c1, "Double precision")) { s_wsfe(&io___55); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } else if (nmats <= 0) { /* Check for a positive number of tests requested. */ s_wsfe(&io___56); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); goto L140; } else if (lsamen_(&c__2, c2, "GE")) { /* GE: general matrices */ ntypes = 11; alareq_("DGE", &nmats, dotype, &ntypes, &c__5, &c__6); /* Test the error exits */ if (tsterr) { derrab_(&c__6); } if (tstdrv) { ddrvab_(dotype, &nm, mval, &nns, nsval, &thresh, &lda, a, &a[ 17424], b, &b[2112], work, rwork, swork, iwork, &c__6); } else { s_wsfe(&io___65); do_fio(&c__1, "DSGESV", (ftnlen)6); e_wsfe(); } } else if (lsamen_(&c__2, c2, "PO")) { /* PO: positive definite matrices */ ntypes = 9; alareq_("DPO", &nmats, dotype, &ntypes, &c__5, &c__6); if (tsterr) { derrac_(&c__6); } if (tstdrv) { ddrvac_(dotype, &nm, mval, &nns, nsval, &thresh, &lda, a, &a[ 17424], b, &b[2112], work, rwork, swork, &c__6); } else { s_wsfe(&io___66); do_fio(&c__1, path, (ftnlen)3); e_wsfe(); } } else { } /* Go back to get another input line. */ goto L80; /* Branch to this line when the last record is read. */ L140: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); s2 = dsecnd_(); s_wsfe(&io___68); e_wsfe(); s_wsfe(&io___69); d__1 = s2 - s1; do_fio(&c__1, (char *)&d__1, (ftnlen)sizeof(doublereal)); e_wsfe(); /* L9988: */ /* End of DCHKAB */ return 0; } /* MAIN__ */
/* Main program */ MAIN__(void) { /* Format strings */ static char fmt_9999[] = "(\002 Time for 1,000,000 DAXPY ops = \002,g10" ".3,\002 seconds\002)"; static char fmt_9998[] = "(\002 DAXPY performance rate = \002,g10" ".3,\002 mflops \002)"; static char fmt_9994[] = "(\002 *** Error: Time for operations was zer" "o\002)"; static char fmt_9997[] = "(\002 Including DSECND, time = \002,g10" ".3,\002 seconds\002)"; static char fmt_9996[] = "(\002 Average time for DSECND = \002,g10" ".3,\002 milliseconds\002)"; static char fmt_9995[] = "(\002 Equivalent floating point ops = \002,g10" ".3,\002 ops\002)"; /* System generated locals */ doublereal d__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ static integer i, j; static doublereal alpha, x[100], y[100]; extern /* Subroutine */ int mysub_(integer *, doublereal *, doublereal *); static doublereal t1, t2; extern doublereal dsecnd_(void); static doublereal tnosec, avg; /* Fortran I/O blocks */ static cilist io___8 = { 0, 6, 0, fmt_9999, 0 }; static cilist io___9 = { 0, 6, 0, fmt_9998, 0 }; static cilist io___10 = { 0, 6, 0, fmt_9994, 0 }; static cilist io___12 = { 0, 6, 0, fmt_9997, 0 }; static cilist io___14 = { 0, 6, 0, fmt_9996, 0 }; static cilist io___15 = { 0, 6, 0, fmt_9995, 0 }; /* -- LAPACK test routine (version 3.0) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* September 30, 1994 */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Initialize X and Y */ for (i = 1; i <= 100; ++i) { x[i - 1] = 1. / (doublereal) i; y[i - 1] = (doublereal) (100 - i) / 100.; /* L10: */ } alpha = .315; /* Time 1,000,000 DAXPY operations */ t1 = dsecnd_(); for (j = 1; j <= 5000; ++j) { for (i = 1; i <= 100; ++i) { y[i - 1] += alpha * x[i - 1]; /* L20: */ } alpha = -alpha; /* L30: */ } t2 = dsecnd_(); s_wsfe(&io___8); d__1 = t2 - t1; do_fio(&c__1, (char *)&d__1, (ftnlen)sizeof(doublereal)); e_wsfe(); if (t2 - t1 > 0.) { s_wsfe(&io___9); d__1 = 1. / (t2 - t1); do_fio(&c__1, (char *)&d__1, (ftnlen)sizeof(doublereal)); e_wsfe(); } else { s_wsfe(&io___10); e_wsfe(); } tnosec = t2 - t1; /* Time 1,000,000 DAXPY operations with DSECND in the outer loop */ t1 = dsecnd_(); for (j = 1; j <= 5000; ++j) { for (i = 1; i <= 100; ++i) { y[i - 1] += alpha * x[i - 1]; /* L40: */ } alpha = -alpha; t2 = dsecnd_(); /* L50: */ } /* Compute the time in milliseconds used by an average call */ /* to DSECND. */ s_wsfe(&io___12); d__1 = t2 - t1; do_fio(&c__1, (char *)&d__1, (ftnlen)sizeof(doublereal)); e_wsfe(); avg = (t2 - t1 - tnosec) * 1e3 / 5e3; s_wsfe(&io___14); do_fio(&c__1, (char *)&avg, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Compute the equivalent number of floating point operations used */ /* by an average call to DSECND. */ if (tnosec > 0.) { s_wsfe(&io___15); d__1 = avg * 1e3 / tnosec; do_fio(&c__1, (char *)&d__1, (ftnlen)sizeof(doublereal)); e_wsfe(); } mysub_(&c__100, x, y); return 0; } /* MAIN__ */
/* Subroutine */ int dgelsx_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * info) { /* Initialized data */ static integer gelsx = 1; static integer geqpf = 2; static integer latzm = 6; static integer orm2r = 4; static integer trsm = 5; static integer tzrqf = 3; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; doublereal d__1; /* Local variables */ static doublereal anrm, bnrm, smin, smax; static integer i__, j, k, iascl, ibscl; extern doublereal dopla_(char *, integer *, integer *, integer *, integer *, integer *); static integer ismin, ismax; static doublereal c1, c2; extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dlaic1_( integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); static doublereal s1, s2, t1, t2; extern doublereal dopbl3_(char *, integer *, integer *, integer *) ; extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dlabad_( doublereal *, doublereal *); extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); static integer mn; extern doublereal dsecnd_(void); extern /* Subroutine */ int dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), dgeqpf_(integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); static doublereal bignum; extern /* Subroutine */ int dlatzm_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *); static doublereal sminpr, smaxpr, smlnum; extern /* Subroutine */ int dtzrqf_(integer *, integer *, doublereal *, integer *, doublereal *, integer *); static doublereal tim1, tim2; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] /* -- LAPACK driver routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Common blocks to return operation counts and timings Purpose ======= DGELSX computes the minimum-norm solution to a real linear least squares problem: minimize || A * X - B || using a complete orthogonal factorization of A. A is an M-by-N matrix which may be rank-deficient. Several right hand side vectors b and solution vectors x can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X. The routine first computes a QR factorization with column pivoting: A * P = Q * [ R11 R12 ] [ 0 R22 ] with R11 defined as the largest leading submatrix whose estimated condition number is less than 1/RCOND. The order of R11, RANK, is the effective rank of A. Then, R22 is considered to be negligible, and R12 is annihilated by orthogonal transformations from the right, arriving at the complete orthogonal factorization: A * P = Q * [ T11 0 ] * Z [ 0 0 ] The minimum-norm solution is then X = P * Z' [ inv(T11)*Q1'*B ] [ 0 ] where Q1 consists of the first RANK columns of Q. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of matrices B and X. NRHS >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, A has been overwritten by details of its complete orthogonal factorization. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the M-by-NRHS right hand side matrix B. On exit, the N-by-NRHS solution matrix X. If m >= n and RANK = n, the residual sum-of-squares for the solution in the i-th column is given by the sum of squares of elements N+1:M in that column. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,M,N). JPVT (input/output) INTEGER array, dimension (N) On entry, if JPVT(i) .ne. 0, the i-th column of A is an initial column, otherwise it is a free column. Before the QR factorization of A, all initial columns are permuted to the leading positions; only the remaining free columns are moved as a result of column pivoting during the factorization. On exit, if JPVT(i) = k, then the i-th column of A*P was the k-th column of A. RCOND (input) DOUBLE PRECISION RCOND is used to determine the effective rank of A, which is defined as the order of the largest leading triangular submatrix R11 in the QR factorization with pivoting of A, whose estimated condition number < 1/RCOND. RANK (output) INTEGER The effective rank of A, i.e., the order of the submatrix R11. This is the same as the order of the submatrix T11 in the complete orthogonal factorization of A. WORK (workspace) DOUBLE PRECISION array, dimension (max( min(M,N)+3*N, 2*min(M,N)+NRHS )), INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --jpvt; --work; /* Function Body */ mn = min(*m,*n); ismin = mn + 1; ismax = (mn << 1) + 1; /* Test the input arguments. */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*m); if (*ldb < max(i__1,*n)) { *info = -7; } } if (*info != 0) { i__1 = -(*info); xerbla_("DGELSX", &i__1); return 0; } /* Quick return if possible Computing MIN */ i__1 = min(*m,*n); if (min(i__1,*nrhs) == 0) { *rank = 0; return 0; } /* Get machine parameters */ lstime_1.opcnt[gelsx - 1] += 2.; smlnum = dlamch_("S") / dlamch_("P"); bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); /* Scale A, B if max elements outside range [SMLNUM,BIGNUM] */ anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]); iascl = 0; if (anrm > 0. && anrm < smlnum) { /* Scale matrix norm up to SMLNUM */ lstime_1.opcnt[gelsx - 1] += (doublereal) (*m * *n); dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM */ lstime_1.opcnt[gelsx - 1] += (doublereal) (*m * *n); dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); dlaset_("F", &i__1, nrhs, &c_b13, &c_b13, &b[b_offset], ldb); *rank = 0; goto L100; } bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); ibscl = 0; if (bnrm > 0. && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM */ lstime_1.opcnt[gelsx - 1] += (doublereal) (*m * *nrhs); dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM */ lstime_1.opcnt[gelsx - 1] += (doublereal) (*m * *nrhs); dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info); ibscl = 2; } /* Compute QR factorization with column pivoting of A: A * P = Q * R */ lstime_1.opcnt[geqpf - 1] += dopla_("DGEQPF", m, n, &c__0, &c__0, &c__0); tim1 = dsecnd_(); dgeqpf_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], info); tim2 = dsecnd_(); lstime_1.timng[geqpf - 1] += tim2 - tim1; /* workspace 3*N. Details of Householder rotations stored in WORK(1:MN). Determine RANK using incremental condition estimation */ work[ismin] = 1.; work[ismax] = 1.; smax = (d__1 = a_ref(1, 1), abs(d__1)); smin = smax; if ((d__1 = a_ref(1, 1), abs(d__1)) == 0.) { *rank = 0; i__1 = max(*m,*n); dlaset_("F", &i__1, nrhs, &c_b13, &c_b13, &b[b_offset], ldb); goto L100; } else { *rank = 1; } L10: if (*rank < mn) { i__ = *rank + 1; latime_1.ops = 0.; dlaic1_(&c__2, rank, &work[ismin], &smin, &a_ref(1, i__), &a_ref(i__, i__), &sminpr, &s1, &c1); dlaic1_(&c__1, rank, &work[ismax], &smax, &a_ref(1, i__), &a_ref(i__, i__), &smaxpr, &s2, &c2); lstime_1.opcnt[gelsx - 1] = lstime_1.opcnt[gelsx - 1] + latime_1.ops + 1.; if (smaxpr * *rcond <= sminpr) { lstime_1.opcnt[gelsx - 1] += (doublereal) (*rank << 1); i__1 = *rank; for (i__ = 1; i__ <= i__1; ++i__) { work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1]; work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1]; /* L20: */ } work[ismin + *rank] = c1; work[ismax + *rank] = c2; smin = sminpr; smax = smaxpr; ++(*rank); goto L10; } } /* Logically partition R = [ R11 R12 ] [ 0 R22 ] where R11 = R(1:RANK,1:RANK) [R11,R12] = [ T11, 0 ] * Y */ if (*rank < *n) { lstime_1.opcnt[tzrqf - 1] += dopla_("DTZRQF", rank, n, &c__0, &c__0, & c__0); tim1 = dsecnd_(); dtzrqf_(rank, n, &a[a_offset], lda, &work[mn + 1], info); tim2 = dsecnd_(); lstime_1.timng[tzrqf - 1] += tim2 - tim1; } /* Details of Householder rotations stored in WORK(MN+1:2*MN) B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ lstime_1.opcnt[orm2r - 1] += dopla_("DORMQR", m, nrhs, &mn, &c__0, &c__0); tim1 = dsecnd_(); dorm2r_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], & b[b_offset], ldb, &work[(mn << 1) + 1], info); tim2 = dsecnd_(); lstime_1.timng[orm2r - 1] += tim2 - tim1; /* workspace NRHS B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */ lstime_1.opcnt[trsm - 1] += dopbl3_("DTRSM ", rank, nrhs, &c__0); tim1 = dsecnd_(); dtrsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b49, & a[a_offset], lda, &b[b_offset], ldb); tim2 = dsecnd_(); lstime_1.timng[trsm - 1] += tim2 - tim1; i__1 = *n; for (i__ = *rank + 1; i__ <= i__1; ++i__) { i__2 = *nrhs; for (j = 1; j <= i__2; ++j) { b_ref(i__, j) = 0.; /* L30: */ } /* L40: */ } /* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) */ if (*rank < *n) { lstime_1.opcnt[latzm - 1] += (doublereal) (((*n - *rank) * *nrhs + * nrhs + (*n - *rank) * *nrhs << 1) * *rank); tim1 = dsecnd_(); i__1 = *rank; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *n - *rank + 1; dlatzm_("Left", &i__2, nrhs, &a_ref(i__, *rank + 1), lda, &work[ mn + i__], &b_ref(i__, 1), &b_ref(*rank + 1, 1), ldb, & work[(mn << 1) + 1]); /* L50: */ } tim2 = dsecnd_(); lstime_1.timng[latzm - 1] += tim2 - tim1; } /* workspace NRHS B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[(mn << 1) + i__] = 1.; /* L60: */ } i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[(mn << 1) + i__] == 1.) { if (jpvt[i__] != i__) { k = i__; t1 = b_ref(k, j); t2 = b_ref(jpvt[k], j); L70: b_ref(jpvt[k], j) = t1; work[(mn << 1) + k] = 0.; t1 = t2; k = jpvt[k]; t2 = b_ref(jpvt[k], j); if (jpvt[k] != i__) { goto L70; } b_ref(i__, j) = t1; work[(mn << 1) + k] = 0.; } } /* L80: */ } /* L90: */ } /* Undo scaling */ if (iascl == 1) { lstime_1.opcnt[gelsx - 1] += (doublereal) (*n * *nrhs + *rank * *rank) ; dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info); dlascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], lda, info); } else if (iascl == 2) { lstime_1.opcnt[gelsx - 1] += (doublereal) (*n * *nrhs + *rank * *rank) ; dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info); dlascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], lda, info); } if (ibscl == 1) { lstime_1.opcnt[gelsx - 1] += (doublereal) (*n * *nrhs); dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } else if (ibscl == 2) { lstime_1.opcnt[gelsx - 1] += (doublereal) (*n * *nrhs); dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } L100: return 0; /* End of DGELSX */ } /* dgelsx_ */
/* Subroutine */ int dtimqp_(char *line, integer *nm, integer *mval, integer * nval, integer *nlda, integer *ldaval, doublereal *timmin, doublereal * a, doublereal *copya, doublereal *tau, doublereal *work, integer * iwork, doublereal *reslts, integer *ldr1, integer *ldr2, integer * nout, ftnlen line_len) { /* Initialized data */ static char subnam[6*1] = "DGEQPF"; static integer modes[2] = { 2,3 }; static integer iseed[4] = { 0,0,0,1 }; /* Format strings */ static char fmt_9999[] = "(1x,a6,\002 timing run not attempted\002,/)"; static char fmt_9998[] = "(/\002 *** Speed of \002,a6,\002 in megaflops " "***\002)"; static char fmt_9997[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)"; /* System generated locals */ integer reslts_dim1, reslts_dim2, reslts_offset, i__1, i__2, i__3; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void); /* Local variables */ static integer ilda; static doublereal cond; static integer mode; static doublereal dmax__; static integer info; static char path[3]; static doublereal time; static integer i__, m, n; static char cname[6]; static integer imode; extern doublereal dopla_(char *, integer *, integer *, integer *, integer *, integer *); static integer minmn; extern /* Subroutine */ int icopy_(integer *, integer *, integer *, integer *, integer *); static doublereal s1, s2; extern /* Subroutine */ int dprtb5_(char *, char *, char *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen, ftnlen); static integer ic, im; extern doublereal dlamch_(char *), dsecnd_(void); extern /* Subroutine */ int dgeqpf_(integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *), atimck_(integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, ftnlen), dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal dmflop_(doublereal *, doublereal *, integer *); extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), dlatms_( integer *, integer *, char *, integer *, char *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, char *, doublereal *, integer *, doublereal *, integer *); static doublereal untime; static logical timsub[1]; static integer lda, icl; static doublereal ops; /* Fortran I/O blocks */ static cilist io___8 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___27 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 0, 0, 0, 0 }; #define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6] #define reslts_ref(a_1,a_2,a_3) reslts[((a_3)*reslts_dim2 + (a_2))*\ reslts_dim1 + a_1] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DTIMQP times the LAPACK routines to perform the QR factorization with column pivoting of a DOUBLE PRECISION general matrix. Two matrix types may be used for timing. The number of types is set in the parameter NMODE and the matrix types are set in the vector MODES, using the following key: 2. BREAK1 D(1:N-1)=1 and D(N)=1.0/COND in DLATMS 3. GEOM D(I)=COND**(-(I-1)/(N-1)) in DLATMS These numbers are chosen to correspond with the matrix types in the test code. Arguments ========= LINE (input) CHARACTER*80 The input line that requested this routine. The first six characters contain either the name of a subroutine or a generic path name. The remaining characters may be used to specify the individual routines to be timed. See ATIMIN for a full description of the format of the input line. NM (input) INTEGER The number of values of M and N contained in the vectors MVAL and NVAL. The matrix sizes are used in pairs (M,N). MVAL (input) INTEGER array, dimension (NM) The values of the matrix row dimension M. NVAL (input) INTEGER array, dimension (NM) The values of the matrix column dimension N. NLDA (input) INTEGER The number of values of LDA contained in the vector LDAVAL. LDAVAL (input) INTEGER array, dimension (NLDA) The values of the leading dimension of the array A. TIMMIN (input) DOUBLE PRECISION The minimum time a subroutine will be timed. A (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX) where LDAMAX and NMAX are the maximum values of LDA and N. COPYA (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX) TAU (workspace) DOUBLE PRECISION array, dimension (min(M,N)) WORK (workspace) DOUBLE PRECISION array, dimension (3*NMAX) IWORK (workspace) INTEGER array, dimension (2*NMAX) RESLTS (workspace) DOUBLE PRECISION array, dimension (LDR1,LDR2,NLDA) The timing results for each subroutine over the relevant values of MODE, (M,N), and LDA. LDR1 (input) INTEGER The first dimension of RESLTS. LDR1 >= max(1,NM). LDR2 (input) INTEGER The second dimension of RESLTS. LDR2 >= max(1,NM). NOUT (input) INTEGER The unit number for output. ===================================================================== Parameter adjustments */ --mval; --nval; --ldaval; --a; --copya; --tau; --work; --iwork; reslts_dim1 = *ldr1; reslts_dim2 = *ldr2; reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * 1); reslts -= reslts_offset; /* Function Body Extract the timing request from the input line. */ s_copy(path, "Double precision", (ftnlen)1, (ftnlen)16); s_copy(path + 1, "QP", (ftnlen)2, (ftnlen)2); atimin_(path, line, &c__1, subnam, timsub, nout, &info, (ftnlen)3, ( ftnlen)80, (ftnlen)6); if (! timsub[0] || info != 0) { goto L80; } /* Check that M <= LDA for the input values. */ s_copy(cname, line, (ftnlen)6, (ftnlen)6); atimck_(&c__1, cname, nm, &mval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); if (info > 0) { io___8.ciunit = *nout; s_wsfe(&io___8); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); goto L80; } /* Set the condition number and scaling factor for the matrices to be generated. */ dmax__ = 1.; cond = 1. / dlamch_("Precision"); /* Do for each pair of values (M,N): */ i__1 = *nm; for (im = 1; im <= i__1; ++im) { m = mval[im]; n = nval[im]; minmn = min(m,n); /* Do for each value of LDA: */ i__2 = *nlda; for (ilda = 1; ilda <= i__2; ++ilda) { lda = ldaval[ilda]; for (imode = 1; imode <= 2; ++imode) { mode = modes[imode - 1]; /* Generate a test matrix of size m by n using the singular value distribution indicated by MODE. */ i__3 = n; for (i__ = 1; i__ <= i__3; ++i__) { iwork[n + i__] = 0; /* L10: */ } dlatms_(&m, &n, "Uniform", iseed, "Nonsymm", &tau[1], &mode, & cond, &dmax__, &m, &n, "No packing", ©a[1], &lda, &work[1], &info); /* DGEQPF: QR factorization with column pivoting */ dlacpy_("All", &m, &n, ©a[1], &lda, &a[1], &lda); icopy_(&n, &iwork[n + 1], &c__1, &iwork[1], &c__1); ic = 0; s1 = dsecnd_(); L20: dgeqpf_(&m, &n, &a[1], &lda, &iwork[1], &tau[1], &work[1], & info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { dlacpy_("All", &m, &n, ©a[1], &lda, &a[1], &lda); icopy_(&n, &iwork[n + 1], &c__1, &iwork[1], &c__1); goto L20; } /* Subtract the time used in DLACPY and ICOPY. */ icl = 1; s1 = dsecnd_(); L30: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { dlacpy_("All", &m, &n, ©a[1], &lda, &a[1], &lda); icopy_(&n, &iwork[n + 1], &c__1, &iwork[1], &c__1); goto L30; } time = (time - untime) / (doublereal) ic; ops = dopla_("DGEQPF", &m, &n, &c__0, &c__0, &c__1) ; reslts_ref(imode, im, ilda) = dmflop_(&ops, &time, &info); /* L40: */ } /* L50: */ } /* L60: */ } /* Print tables of results */ io___27.ciunit = *nout; s_wsfe(&io___27); do_fio(&c__1, subnam_ref(0, 1), (ftnlen)6); e_wsfe(); if (*nlda > 1) { i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___28.ciunit = *nout; s_wsfe(&io___28); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer)); e_wsfe(); /* L70: */ } } io___29.ciunit = *nout; s_wsle(&io___29); e_wsle(); dprtb5_("Type", "M", "N", &c__2, modes, nm, &mval[1], &nval[1], nlda, & reslts[reslts_offset], ldr1, ldr2, nout, (ftnlen)4, (ftnlen)1, ( ftnlen)1); L80: return 0; /* End of DTIMQP */ } /* dtimqp_ */
int main(int argc, char* argv[]) { unsigned int m=8, n=8, k=8, lda=8, ldb=8, ldc=8, nerrs, num, nmat; unsigned int layout, asize, bsize, ntest, ncorr; #ifdef AVX512_TESTING unsigned int VLEND=8, VLENS=16; int arch=LIBXSMM_X86_AVX512_CORE; #else unsigned int VLEND=4, VLENS=8; int arch=LIBXSMM_X86_AVX2; #endif unsigned int nmats, nmatd; unsigned int i, j, l, iunroll, junroll, loopi, loopj; char side='L', uplo='U', transa='N', transb='N', diag='N'; unsigned int typesize8 = 8; unsigned int typesize4 = 4; float *sa, *sb, *sc, *sd, *sc1; double *da, *db, *dc, *dd, *dc1; double dalpha = 1.0; float salpha = (float)dalpha; double dbeta = 1.0; float sbeta = (float)dbeta; double dtmp; const unsigned char *cptr = NULL; unsigned long op_count; const libxsmm_pgemm_descriptor* desc8 = NULL; const libxsmm_pgemm_descriptor* desc4 = NULL; #ifdef USE_XSMM_GENERATED libxsmm_descriptor_blob blob; libxsmm_pgemm_xfunction mykernel = NULL; #endif #if defined(USE_KERNEL_GENERATION_DIRECTLY) && defined(__linux__) void (*opcode_routine)(); unsigned char *routine_output; libxsmm_generated_code io_generated_code; int pagesize = sysconf(_SC_PAGE_SIZE); if (pagesize == -1) fprintf(stderr,"sysconf pagesize\n"); routine_output = (unsigned char *) mmap(NULL, BUFSIZE2, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0,0); if (mprotect(routine_output, BUFSIZE2, PROT_EXEC | PROT_READ | PROT_WRITE ) == -1) fprintf(stderr,"mprotect\n"); printf("Routine ready\n"); io_generated_code.generated_code = &routine_output[0]; io_generated_code.buffer_size = BUFSIZE2; io_generated_code.code_size = 0; io_generated_code.code_type = 2; io_generated_code.last_error = 0; #endif printf("\nUSAGE: %s m n k lda ldb ldc nmat layout ntest transa transb iunroll junroll loopj loopi\n",argv[0]); if ( argc <= 3 ) { #ifdef TEST_SINGLE printf("Compact SGEMM a C_mxn<-C_mxn+A_mxk*B_kxn matrix of leading dims lda/b/c\n"); printf("This will test the jit of 1 VLEN=%d ",VLENS); if ( VLENS==8 ) printf("(AVX2)"); else printf("(AVX512)"); #else printf("Compact DGEMM a C_mxn<-C_mxn+A_mxk*B_kxn matrix of leading dims lda/b/c\n"); printf("This will test the jit of 1 VLEN=%d ",VLEND); if ( VLEND==4 ) printf("(AVX2)"); else printf("(AVX512)"); #endif printf(" work of nmat at a time\n"); printf("Configurable: M-loop controlled by iunroll & loopi. N-loop by junroll & loopj\n"); printf("Defaults: m=n=k=lda=ldb=ldc=nmat=8, layout=102 (col major), transa=/b='N', ntest=1\n"); } if ( argc > 1 ) m = atoi(argv[1]); else m = 8; if ( argc > 2 ) n = atoi(argv[2]); else n = 8; if ( argc > 3 ) k = atoi(argv[3]); else k = 8; if ( argc > 4 ) lda= atoi(argv[4]); else lda = 8; if ( argc > 5 ) ldb= atoi(argv[5]); else ldb = 8; if ( argc > 6 ) ldc= atoi(argv[6]); else ldc = 8; if ( argc > 7 ) nmat = atoi(argv[7]); else nmat = 8; if ( argc > 8 ) layout = atoi(argv[8]); else layout=102; if ( argc > 9 ) ntest = atoi(argv[9]); else ntest = 1; if ( argc > 10 ) transa = argv[10][0]; else transa = 'N'; if ( argc > 11 ) transb = argv[11][0]; else transb = 'N'; if ( argc > 12 ) iunroll=atoi(argv[12]); else iunroll=0; if ( argc > 13 ) junroll=atoi(argv[13]); else junroll=0; if ( argc > 14 ) loopj=atoi(argv[14]); else loopj=0; if ( argc > 15 ) loopi=atoi(argv[15]); else loopi=0; salpha = (float)dalpha; m = LIBXSMM_MAX(m,1); n = LIBXSMM_MAX(n,1); k = LIBXSMM_MAX(k,1); ntest = LIBXSMM_MAX(ntest,1); nmat = LIBXSMM_MAX(nmat,VLEND); layout = LIBXSMM_MAX(LIBXSMM_MIN(layout,102),101); if ( transa!='N' && transa!='n' && transa!='T' && transa!='t' ) transa='N'; if ( transb!='N' && transb!='n' && transb!='T' && transb!='t' ) transb='N'; lda = LIBXSMM_MAX(lda,m); ldb = LIBXSMM_MAX(ldb,k); ldc = LIBXSMM_MAX(ldc,m); nmats = LIBXSMM_MAX(VLENS,nmat - (nmat%VLENS)); nmatd = LIBXSMM_MAX(VLEND,nmat - (nmat%VLEND)); #ifdef TEST_SINGLE nmat = nmats; #else nmat = nmatd; #endif op_count = (unsigned long)(nmat * 2.0 * (double)m * (double)n * (double)k); #ifdef TEST_SINGLE printf("This is a real*%d tester for JIT compact SGEMM %c%c kernels! (m=%u n=%u k=%u lda=%u ldb=%u ldc=%u layout=%d nmat=%d alpha=%g beta=%g iun=%d jun=%d loopi=%d loopj=%d VLEN=%d)\n",typesize4,transa,transb,m,n,k,lda,ldb,ldc,layout,nmat,dalpha,dbeta,iunroll,junroll,loopi,loopj,VLENS); #else printf("This is a real*%d tester for JIT compact DGEMM %c%c kernels! (m=%u n=%u k=%u lda=%u ldb=%u ldc=%u layout=%d nmat=%d alpha=%g beta=%g iun=%d jun=%d loopi=%d loopj=%d VLEN=%d)\n",typesize8,transa,transb,m,n,k,lda,ldb,ldc,layout,nmat,dalpha,dbeta,iunroll,junroll,loopi,loopj,VLEND); #endif #ifdef USE_XSMM_GENERATED printf("This code tests the LIBXSMM generated kernels\n"); #endif #ifdef USE_PREDEFINED_ASSEMBLY printf("This code tests some predefined assembly kernel\n"); #endif #if defined(USE_KERNEL_GENERATION_DIRECTLY) && defined(__linux__) printf("This code tests kernel generation directly\n"); #endif #ifdef TIME_MKL printf("This code tests MKL compact batch directly\n"); #endif #ifdef AVX512_TESTING printf("This tests AVX512 binaries\n"); #endif #ifdef AVX2_TESTING printf("This tests AVX2 binaries\n"); #endif desc8 = libxsmm_pgemm_descriptor_init(&blob, typesize8, m, n, k, lda, ldb, ldc, &dalpha, transa, transb, layout ); #ifdef TEST_SINGLE desc4 = libxsmm_pgemm_descriptor_init(&blob, typesize4, m, n, k, lda, ldb, ldc, &dalpha, transa, transb, layout ); #endif printf("Descriptor set\n"); #ifdef USE_XSMM_GENERATED printf("calling libxsmm_dispatch_pgemm: typesize8=%u\n",typesize8); mykernel = libxsmm_dispatch_pgemm(desc8); printf("done calling libxsmm_dispatch_pgemm: typesize8=%u\n",typesize8); if ( mykernel == NULL ) printf("R8 Kernel after the create call is null\n"); #ifdef TEST_SINGLE mykernel = libxsmm_dispatch_pgemm(desc4); if ( mykernel == NULL ) printf("R4 kernel after the create call is null\n"); #endif #endif #if defined(USE_KERNEL_GENERATION_DIRECTLY) && defined(__linux__) libxsmm_generator_pgemm_kernel( &io_generated_code, desc8, arch, iunroll, junroll, loopi, loopj ); #endif #ifndef NO_ACCURACY_CHECK printf("mallocing matrices\n"); #endif sa = (float *) malloc ( lda*k*nmat*sizeof(float) ); da = (double *) malloc ( lda*k*nmat*sizeof(double) ); sb = (float *) malloc ( ldb*n*nmat*sizeof(float) ); db = (double *) malloc ( ldb*n*nmat*sizeof(double) ); sc1 = (float *) malloc ( ldc*n*nmat*sizeof(float) ); dc1 = (double *) malloc ( ldc*n*nmat*sizeof(double) ); sc = (float *) malloc ( ldc*n*nmat*sizeof(float) ); dc = (double *) malloc ( ldc*n*nmat*sizeof(double) ); sd = (float *) malloc ( ldc*n*nmat*sizeof(float) ); dd = (double *) malloc ( ldc*n*nmat*sizeof(double) ); #ifndef NO_ACCURACY_CHECK printf("filling matrices\n"); #endif sfill_matrix ( sa, lda, m, k*nmat ); sfill_matrix ( sb, ldb, k, n*nmat ); sfill_matrix ( sc, ldc, m, n*nmat ); dfill_matrix ( da, lda, m, k*nmat ); dfill_matrix ( db, ldb, k, n*nmat ); dfill_matrix ( dc, ldc, m, n*nmat ); #ifndef NO_ACCURACY_CHECK for ( i = 0 ; i < ldc*n*nmat ; i++ ) sd[i]=sc[i]; for ( i = 0 ; i < ldc*n*nmat ; i++ ) dd[i]=dc[i]; for ( i = 0 ; i < ldc*n*nmat ; i++ ) sc1[i]=sc[i]; for ( i = 0 ; i < ldc*n*nmat ; i++ ) dc1[i]=dc[i]; printf("Pointing at the kernel now\n"); #endif #ifdef USE_XSMM_GENERATED cptr = (const unsigned char*) mykernel; #endif #ifdef USE_PREDEFINED_ASSEMBLY cptr = (const unsigned char*) gemm_; #endif #if defined(USE_KERNEL_GENERATION_DIRECTLY) && defined(__linux__) cptr = (const unsigned char*) &routine_output[0]; opcode_routine = (void *) &cptr[0]; #endif #ifndef TIME_MKL # define DUMP_ASSEMBLY_FILE #endif #ifdef DUMP_ASSEMBLY_FILE printf("Dumping assembly file\n"); FILE *fp = fopen("foo.s","w"); char buffer[80]; fputs("\t.text\n",fp); fputs("\t.align 256\n",fp); fputs("\t.globl gemm_\n",fp); fputs("gemm_:\n",fp); for (i = 0 ; i < 7000; i+=4 ) { sprintf(buffer,".byte 0x%02x, 0x%02x, 0x%02x, 0x%02x\n",cptr[i],cptr[i+1],cptr[i+2],cptr[i+3]); fputs(buffer,fp); } fputs("\tretq\n",fp); fputs("\t.type gemm_,@function\n",fp); fputs("\t.size gemm_,.-gemm_\n",fp); fclose(fp); #endif #if defined(USE_MKL_FOR_REFERENCE) || defined(TIME_MKL) # include <mkl.h> MKL_LAYOUT CLAYOUT = (layout == 101) ? MKL_ROW_MAJOR : MKL_COL_MAJOR; MKL_SIDE SIDE = (side == 'R' || side == 'r') ? MKL_RIGHT : MKL_LEFT; MKL_UPLO UPLO = (uplo == 'U' || uplo == 'u') ? MKL_UPPER : MKL_LOWER; MKL_TRANSPOSE TRANSA = (transa == 'N' || transa == 'n') ? MKL_NOTRANS : MKL_TRANS; MKL_TRANSPOSE TRANSB = (transb == 'N' || transb == 'n') ? MKL_NOTRANS : MKL_TRANS; MKL_DIAG DIAG = (diag == 'N' || diag == 'n') ? MKL_NONUNIT : MKL_UNIT; MKL_COMPACT_PACK CMP_FORMAT = mkl_get_format_compact(); #if 0 MKL_COMPACT_PACK CMP_FORMAT = MKL_COMPACT_AVX; #endif #endif #ifndef NO_ACCURACY_CHECK printf("Before routine, initial A(1,1)=%g A[256]=%g\n",da[0],da[256]); #endif #ifdef USE_PREDEFINED_ASSEMBLY double one = 1.0; #endif double timer, firsttime = 0; #ifdef MKL_TIMER double tmptimer; tmptimer = dsecnd_(); #else unsigned long long l_start, l_end; #endif timer = 0.0; for ( j = 0 ; j < (int)ntest ; j++ ) { for ( i = 0 ; i < ldc*n*nmat ; i++ ) dc[i]=dc1[i]; for ( i = 0 , num = 0; i < (int)nmat ; i+= (int)VLEND, num++ ) { double *Ap = &da[num*lda*k*VLEND]; double *Bp = &db[num*ldb*n*VLEND]; double *Cp = &dc[num*ldc*n*VLEND]; #ifdef MKL_TIMER tmptimer = dsecnd_(); #else l_start = libxsmm_timer_tick(); #endif #if !defined(USE_XSMM_GENERATED) && !defined(USE_PREDEFINED_ASSEMBLY) && !defined(USE_KERNEL_GENERATION_DIRECTLY) && !defined(TIME_MKL) && !defined(USE_PREDEFINED_ASSEMBLY_XCT) gen_compact_dgemm_ ( &layout, &m, &n, &k, &dalpha, Ap, &lda, Bp, &ldb, &dbeta, Cp, &ldc, &VLEND ); #endif #ifdef USE_XSMM_GENERATED mykernel ( Ap, Bp, Cp ); #endif #ifdef USE_PREDEFINED_ASSEMBLY gemm_ ( Ap, Bp, Cp ); #endif #ifdef USE_KERNEL_GENERATION_DIRECTLY (*opcode_routine)( Ap, Bp, Cp ); #endif #ifdef TIME_MKL mkl_dgemm_compact ( CLAYOUT, TRANSA, TRANSB, m, n, k, dalpha, da, lda, db, ldb, dbeta, dc, ldc, CMP_FORMAT, nmat ); i+=nmatd; /* Because MKL will do everything */ #endif #ifdef MKL_TIMER dtmp = dsecnd_() - tmptimer; #else l_end = libxsmm_timer_tick(); dtmp = libxsmm_timer_duration(l_start,l_end); #endif if ( j == 0 ) firsttime=dtmp; timer += dtmp; } } if ( ntest >= 100 ) { /* Skip the first timing: super necessary if using MKL */ timer = (timer-firsttime)/((double)(ntest-1)); } else { timer /= ((double)ntest); } #ifndef NO_ACCURACY_CHECK printf("Average time to get through %u matrices: %g\n",nmat,timer); printf("Gflops: %g\n",(double)op_count/(timer*1.0e9)); printf("after routine, new C(1,1)=%g C[256]=%g\n",dc[0],dc[256]); #endif #ifdef TEST_SINGLE printf("Before r4 routine, initial C(1,1)=%g C[256]=%g\n",sc[0],sc[256]); for ( i = 0 , num = 0; i < nmats ; i+= VLENS, num++ ) { float *Ap = &sa[num*lda*k*VLENS]; float *Bp = &sb[num*ldb*n*VLENS]; float *Cp = &sc[num*ldc*n*VLENS]; #ifdef USE_XSMM_GENERATED mykernel ( Ap, Bp, Cp ); #endif } printf("after r4 routine, new C(1,1)=%g C]256]=%g\n",dc[0],dc[256]); #endif #ifndef NO_ACCURACY_CHECK /* Call some reference code now on a copy of the B matrix (C) */ double timer2 = 0.0; for ( j = 0 ; j < (int)ntest ; j++ ) { for ( i = 0 ; i < ldc*n*nmat ; i++ ) dd[i]=dc1[i]; #ifdef MKL_TIMER tmptimer = dsecnd_(); #else l_start = libxsmm_timer_tick(); #endif #ifndef USE_MKL_FOR_REFERENCE compact_dgemm_ ( &layout, &transa, &transb, &m, &n, &k, &dalpha, da, &lda, db, &ldb, &dbeta, dd, &ldc, &nmat, &VLEND ); #else mkl_dgemm_compact ( CLAYOUT, TRANSA, TRANSB, m, n, k, dalpha, da, lda, db, ldb, dbeta, dd, ldc, CMP_FORMAT, nmat ); #endif #ifdef MKL_TIMER timer2 += dsecnd_() - tmptimer; #else l_end = libxsmm_timer_tick(); timer2 += libxsmm_timer_duration(l_start,l_end); #endif } timer2 /= ((double)ntest); printf("Reference time=%g Reference Gflops=%g\n",timer2,op_count/(timer2*1.0e9)); /* Compute the residual between B and C */ dtmp = residual_d ( dc, ldc, m, n*nmat, dd, ldc, &nerrs, &ncorr ); printf("R8 mnk=%u %u %u ldabc=%u %u %u error: %g number of errors: %u corrects: %u",m,n,k,lda,ldb,ldc,dtmp,nerrs,ncorr); if ( nerrs > 0 ) printf(" ->FAILED at %ux%u real*8 %u case",m,n,layout); printf("\n"); #ifdef TEST_SINGLE /* Call some reference code now on a copy of the B matrix (C) */ compact_dgemm_ ( &layout, &transa, &transb, &m, &n, &k, &salpha, sa, &lda, sb, &ldb, &sbeta, sd, &ldc, &nmat, &VLENS ); /* Compute the residual between C and D */ dtmp = residual_s ( sc, ldc, m, n*nmat, sd, ldc, &nerrs, &ncorr ); printf("R4 mnk=%u %u %u ldabc=%u %u %u error: %g number of errors: %u corrects: %u",m,n,k,lda,ldb,ldc,dtmp,nerrs,ncorr); if ( nerrs > 0 ) printf(" ->FAILED at %ux%u real*4 case",m,n); printf("\n"); #endif #else for ( j = 0, nerrs = 0 ; j < lda*n*nmat; j++ ) { if ( isnan(dc[j]) || isinf(dc[j]) ) { if ( ++nerrs < 10 ) { printf("WARNING: dc[%d]=%g\n",j,dc[j]); } } } printf("%g,real*8 m/n/k=%u %u %u lda-c=%u %u %u Denormals=%u Time=%g Gflops=%g",op_count/(timer*1.0e9),m,n,k,lda,ldb,ldc,nerrs,timer,op_count/(timer*1.0e9)); if ( nerrs > 0 ) printf(" -> FAILED at %ux%u real*8 case",m,n); printf("\n"); #endif free(dd); free(sd); free(dc); free(sc); free(dc1); free(sc1); free(db); free(sb); free(da); free(sa); return 0; }