// Validates the result void validate(double **a, double *b, double *x, int n) { // copy b into x for (int i = 0; i < n; ++i) { x[i] = b[i]; } // reset A and B arrays to orignal rand values double biggestA = fillArray(a, n, b); for (int i = 0; i < n; ++i) { b[i] = -b[i]; } // multipy a*x, add to b dmxpy(n, b, n, x, a); double biggestB = 0.0; double biggestX = 0.0; for (int i = 0; i < n; ++i) { biggestB = (biggestB > abs(b[i])) ? biggestB : abs(b[i]); biggestX = (biggestX > abs(x[i])) ? biggestX : abs(x[i]); } double residn = biggestB / (n * biggestA * biggestX * (2.2204460492503131e-016)); assert(residn < CHECK_VALUE); /* if (residn > CHECK_VALUE) { assert(false); cout << "Validation failed!" << endl; cout << "Computed Norm Res = " << residn << endl; cout << "Reference Norm Res = " << CHECK_VALUE << endl; } else { cout << "Calculations are correct!" << endl; cout << "Computed Norm Res = " << residn << endl; cout << "Reference Norm Res = " << CHECK_VALUE << endl; } */ }
int main (int argc, char *argv[]) { static REAL aa[200*200],a[200*201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,tm2,epsn,x1,x2; REAL mflops; static int ipvt[200],n,i,j,ntimes,info,lda,ldaa; int endit, pass, loop; REAL overhead1, overhead2, time2; REAL max1, max2; char was[5][20]; char expect[5][20]; char title[5][20]; int errors; printf("\n"); printf("##########################################\n"); lda = 201; ldaa = 200; cray = .056; n = 100; fprintf(stdout, "%s ", ROLLING); fprintf(stdout, "%s ", PREC); fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n"); fprintf(stdout,"Optimisation %s\n\n",options); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); start_time(); dgefa(a,lda,n,ipvt,&info); end_time(); atime[0][0] = secs; start_time(); dgesl(a,lda,n,ipvt,b,0); end_time(); atime[1][0] = secs; total = atime[0][0] + atime[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon(ONE); residn = resid/( n*norma*normx*eps ); epsn = eps; x1 = x[0] - 1; x2 = x[n-1] - 1; printf("norm resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n", (double)residn, (double)resid, (double)epsn, (double)x1, (double)x2); fprintf(stderr,"Times are reported for matrices of order %5d\n",n); fprintf(stderr,"1 pass times for array with leading dimension of%5d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); atime[2][0] = total; if (total > 0.0) { atime[3][0] = ops/(1.0e6*total); atime[4][0] = 2.0/atime[3][0]; } else { atime[3][0] = 0.0; atime[4][0] = 0.0; } atime[5][0] = total/cray; print_time(0); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ fprintf (stderr,"\nCalculating matgen overhead\n"); pass = -20; loop = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < loop ; i++) { matgen(a,lda,n,b,&norma); } end_time(); overhead1 = secs; fprintf (stderr,"%10d times %6.2f seconds\n", loop, overhead1); if (overhead1 > runSecs) { pass = 0; } if (pass < 0) { if (overhead1 < 0.1) { loop = loop * 10; } else { loop = loop * 2; } } } while (pass < 0); overhead1 = overhead1 / (double)loop; fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1); /************************************************************************ * Calculate matgen/dgefa passes for runSecs seconds * ************************************************************************/ fprintf (stderr,"Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs); pass = -20; ntimes = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < ntimes ; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); time2 = secs; fprintf (stderr,"%10d times %6.2f seconds\n", ntimes, time2); if (time2 > runSecs) { pass = 0; } if (pass < 0) { if (time2 < 0.1) { ntimes = ntimes * 10; } else { ntimes = ntimes * 2; } } } while (pass < 0); ntimes = (int)(runSecs * (double)ntimes / time2); if (ntimes == 0) ntimes = 1; fprintf(stderr,"Passes used %10d \n\n", ntimes); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead1; atime[3][6] = 0; for (j=1 ; j<6 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][6] = atime[3][6] + atime[3][j]; print_time(j); } atime[3][6] = atime[3][6] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][6]); fprintf (stderr,"\nCalculating matgen2 overhead\n"); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ start_time(); for ( i = 0 ; i < loop ; i++) { matgen(aa,ldaa,n,b,&norma); } end_time(); overhead2 = secs; overhead2 = overhead2 / (double)loop; fprintf(stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",ldaa); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead2; atime[3][12] = 0; for (j=7 ; j<12 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(aa,ldaa,n,b,&norma); dgefa(aa,ldaa,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][12] = atime[3][12] + atime[3][j]; print_time(j); } atime[3][12] = atime[3][12] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][12]); /************************************************************************ * Use minimum average as overall Mflops rating * ************************************************************************/ mflops = atime[3][6]; if (atime[3][12] < mflops) mflops = atime[3][12]; fprintf(stderr,"\n"); fprintf(stderr, "%s ", ROLLING); fprintf(stderr, "%s ", PREC); fprintf(stderr," Precision %11.2f Mflops \n\n",mflops); max1 = 0; for (i=1 ; i<6 ; i++) { if (atime[3][i] > max1) max1 = atime[3][i]; } max2 = 0; for (i=7 ; i<12 ; i++) { if (atime[3][i] > max2) max2 = atime[3][i]; } if (max1 < max2) max2 = max1; sprintf(was[0], "%16.1f",(double)residn); sprintf(was[1], "%16.8e",(double)resid); sprintf(was[2], "%16.8e",(double)epsn); sprintf(was[3], "%16.8e",(double)x1); sprintf(was[4], "%16.8e",(double)x2); /* // Values for Watcom sprintf(expect[0], " 0.4"); sprintf(expect[1], " 7.41628980e-014"); sprintf(expect[2], " 1.00000000e-015"); sprintf(expect[3], "-1.49880108e-014"); sprintf(expect[4], "-1.89848137e-014"); // Values for Visual C++ sprintf(expect[0], " 1.7"); sprintf(expect[1], " 7.41628980e-014"); sprintf(expect[2], " 2.22044605e-016"); sprintf(expect[3], "-1.49880108e-014"); sprintf(expect[4], "-1.89848137e-014"); // Values for Ubuntu GCC 32 Bit sprintf(expect[0], " 1.9"); sprintf(expect[1], " 8.39915160e-14"); sprintf(expect[2], " 2.22044605e-16"); sprintf(expect[3], " -6.22835117e-14"); sprintf(expect[4], " -4.16333634e-14"); */ // Values for Ubuntu GCC 32 Bit sprintf(expect[0], " 1.7"); sprintf(expect[1], " 7.41628980e-14"); sprintf(expect[2], " 2.22044605e-16"); sprintf(expect[3], " -1.49880108e-14"); sprintf(expect[4], " -1.89848137e-14"); sprintf(title[0], "norm. resid"); sprintf(title[1], "resid "); sprintf(title[2], "machep "); sprintf(title[3], "x[0]-1 "); sprintf(title[4], "x[n-1]-1 "); if (strtol(opt, NULL, 10) == 0) { sprintf(expect[2], " 8.88178420e-016"); } errors = 0; printf ("\n"); }
main () #endif { static REAL aa[ORDER2][ORDER2],a[ORDER2][ORDER2P1],b[ORDER2],x[ORDER2]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps; REAL kf; double t1,tm,tm2,dtime(); static int ipvt[ORDER2],n,i,ntimes,info,lda,ldaa,kflops; lda = ORDER2P1; ldaa = ORDER2; cray = .056; n = ORDER; #ifdef OMPC { int c; extern char *optarg; while ((c = getopt (argc, argv, "b:")) != EOF) { switch (c) { case 'b': bf = atoi (optarg); break; } } } if (omp_get_max_threads () != 1) { printf ("OpenMP(%d threads)\n", omp_get_max_threads ()); } else { printf ("OpenMP(1 thread)\n"); } #endif printf(ROLLING); printf(PREC); printf("Precision Linpack\n\n"); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info); st[0][0] = dtime() - t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0); st[1][0] = dtime() - t1; total = st[0][0] + st[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); printf(" norm. resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%8.1f %16.8e%16.8e%16.8e%16.8e\n", (double)residn, (double)resid, (double)eps, (double)x[0]-1, (double)x[n-1]-1); printf(" times are reported for matrices of order %5d\n",n); printf(" dgefa dgesl total kflops unit"); printf(" ratio\n"); st[2][0] = total; st[3][0] = ops/(1.0e3*total); st[4][0] = 2.0e3/st[3][0]; st[5][0] = total/cray; printf(" times for array with leading dimension of%5d\n",lda); print_time(0); matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info); st[0][1] = dtime() - t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0); st[1][1] = dtime() - t1; total = st[0][1] + st[1][1]; st[2][1] = total; st[3][1] = ops/(1.0e3*total); st[4][1] = 2.0e3/st[3][1]; st[5][1] = total/cray; matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info); st[0][2] = dtime() - t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0); st[1][2] = dtime() - t1; total = st[0][2] + st[1][2]; st[2][2] = total; st[3][2] = ops/(1.0e3*total); st[4][2] = 2.0e3/st[3][2]; st[5][2] = total/cray; ntimes = NTIMES; tm2 = 0.0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen(a,lda,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa(a,lda,n,ipvt,&info); } st[0][3] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } st[1][3] = (dtime() - t1)/ntimes; total = st[0][3] + st[1][3]; st[2][3] = total; st[3][3] = ops/(1.0e3*total); st[4][3] = 2.0e3/st[3][3]; st[5][3] = total/cray; print_time(1); print_time(2); print_time(3); matgen(aa,ldaa,n,b,&norma); t1 = dtime(); dgefa(aa,ldaa,n,ipvt,&info); st[0][4] = dtime() - t1; t1 = dtime(); dgesl(aa,ldaa,n,ipvt,b,0); st[1][4] = dtime() - t1; total = st[0][4] + st[1][4]; st[2][4] = total; st[3][4] = ops/(1.0e3*total); st[4][4] = 2.0e3/st[3][4]; st[5][4] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = dtime(); dgefa(aa,ldaa,n,ipvt,&info); st[0][5] = dtime() - t1; t1 = dtime(); dgesl(aa,ldaa,n,ipvt,b,0); st[1][5] = dtime() - t1; total = st[0][5] + st[1][5]; st[2][5] = total; st[3][5] = ops/(1.0e3*total); st[4][5] = 2.0e3/st[3][5]; st[5][5] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = dtime(); dgefa(aa,ldaa,n,ipvt,&info); st[0][6] = dtime() - t1; t1 = dtime(); dgesl(aa,ldaa,n,ipvt,b,0); st[1][6] = dtime() - t1; total = st[0][6] + st[1][6]; st[2][6] = total; st[3][6] = ops/(1.0e3*total); st[4][6] = 2.0e3/st[3][6]; st[5][6] = total/cray; ntimes = NTIMES; tm2 = 0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen(aa,ldaa,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa(aa,ldaa,n,ipvt,&info); } st[0][7] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } st[1][7] = (dtime() - t1)/ntimes; total = st[0][7] + st[1][7]; st[2][7] = total; st[3][7] = ops/(1.0e3*total); st[4][7] = 2.0e3/st[3][7]; st[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(st[3][3],st[3][7]))" */ /* kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } */ if ( st[3][3] < ZERO ) st[3][3] = ZERO; if ( st[3][7] < ZERO ) st[3][7] = ZERO; kf = st[3][3]; if ( st[3][7] < st[3][3] ) kf = st[3][7]; kflops = (int)(kf + 0.5); printf(" times for array with leading dimension of%5d\n",ldaa); print_time(4); print_time(5); print_time(6); print_time(7); printf(ROLLING); printf(PREC); printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES); }
main () { static REAL aa[200*200],a[200*201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,t1,tm2,epsn,x1,x2; REAL mflops; static int ipvt[200],n,i,j,ntimes,info,lda,ldaa; int Endit, pass, loop; REAL overhead1, overhead2, time1, time2; FILE *outfile; char *compiler, *options, general[9][80] = {" "}; outfile = fopen("Linpack.txt","a+"); if (outfile == NULL) { printf ("Cannot open results file \n\n"); printf("Press any key\n"); #ifdef DOS Endit = getch(); #endif exit (0); } /************************************************************************ * Enter details of compiler and options used * ************************************************************************/ /*----------------- --------- --------- ---------*/ compiler = "INSERT COMPILER NAME HERE"; options = "INSERT OPTIMISATION OPTIONS HERE"; /* Include -dDP or -dSP and -dROLL or -dUNROLL */ lda = 201; ldaa = 200; cray = .056; n = 100; fprintf(stdout,ROLLING);fprintf(stdout,PREC); fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n"); fprintf(stdout,"Compiler %s\n",compiler); fprintf(stdout,"Optimisation %s\n\n",options); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); atime[0][0] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); atime[1][0] = second() - t1; total = atime[0][0] + atime[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon(ONE); residn = resid/( n*norma*normx*eps ); epsn = eps; x1 = x[0] - 1; x2 = x[n-1] - 1; printf("norm resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n", (double)residn, (double)resid, (double)epsn, (double)x1, (double)x2); fprintf(stderr,"Times are reported for matrices of order %5d\n",n); fprintf(stderr,"1 pass times for array with leading dimension of%5d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); atime[2][0] = total; if (total > 0.0) { atime[3][0] = ops/(1.0e6*total); atime[4][0] = 2.0/atime[3][0]; } else { atime[3][0] = 0.0; atime[4][0] = 0.0; } atime[5][0] = total/cray; print_time(0); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ fprintf (stderr,"\nCalculating matgen overhead\n"); pass = -20; loop = NTIMES; do { time1 = second(); pass = pass + 1; for ( i = 0 ; i < loop ; i++) { matgen(a,lda,n,b,&norma); } time2 = second(); overhead1 = (time2 - time1); fprintf (stderr,"%10d times %6.2f seconds\n", loop, overhead1); if (overhead1 > 5.0) { pass = 0; } if (pass < 0) { if (overhead1 < 0.1) { loop = loop * 10; } else { loop = loop * 2; } } } while (pass < 0); overhead1 = overhead1 / (double)loop; fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1); /************************************************************************ * Calculate matgen/dgefa passes for 5 seconds * ************************************************************************/ fprintf (stderr,"Calculating matgen/dgefa passes for 5 seconds\n"); pass = -20; ntimes = NTIMES; do { time1 = second(); pass = pass + 1; for ( i = 0 ; i < ntimes ; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } time2 = second() - time1; fprintf (stderr,"%10d times %6.2f seconds\n", ntimes, time2); if (time2 > 5.0) { pass = 0; } if (pass < 0) { if (time2 < 0.1) { ntimes = ntimes * 10; } else { ntimes = ntimes * 2; } } } while (pass < 0); ntimes = 5.0 * (double)ntimes / time2; if (ntimes == 0) ntimes = 1; fprintf (stderr,"Passes used %10d \n\n", ntimes); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead1; atime[3][6] = 0; for (j=1 ; j<6 ; j++) { t1 = second(); for (i = 0; i < ntimes; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } atime[0][j] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } atime[1][j] = (second() - t1)/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][6] = atime[3][6] + atime[3][j]; print_time(j); } atime[3][6] = atime[3][6] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][6]); fprintf (stderr,"\nCalculating matgen2 overhead\n"); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ time1 = second(); for ( i = 0 ; i < loop ; i++) { matgen(aa,ldaa,n,b,&norma); } time2 = second(); overhead2 = (time2 - time1); overhead2 = overhead2 / (double)loop; fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",ldaa); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead2; atime[3][12] = 0; for (j=7 ; j<12 ; j++) { t1 = second(); for (i = 0; i < ntimes; i++) { matgen(aa,ldaa,n,b,&norma); dgefa(aa,ldaa,n,ipvt,&info ); } atime[0][j] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } atime[1][j] = (second() - t1)/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][12] = atime[3][12] + atime[3][j]; print_time(j); } atime[3][12] = atime[3][12] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][12]); /************************************************************************ * Use minimum average as overall Mflops rating * ************************************************************************/ mflops = atime[3][6]; if (atime[3][12] < mflops) mflops = atime[3][12]; fprintf(stderr,"\n"); fprintf(stderr,ROLLING);fprintf(stderr,PREC); fprintf(stderr," Precision %11.2f Mflops \n\n",mflops); what_date(); /************************************************************************ * Type details of hardware, software etc. * ************************************************************************/ printf ("Enter the following data which will be " "appended to file Linpack.txt \n\n"); printf ("PC Supplier/model ?\n "); scanf ("%[^\n]", general[1]); fflush (stdin); printf ("CPU ?\n "); scanf ("%[^\n]", general[2]); fflush (stdin); printf ("Clock MHz ?\n "); scanf ("%[^\n]", general[3]); fflush (stdin); printf ("Cache ?\n "); scanf ("%[^\n]", general[4]); fflush (stdin); printf ("Chipset/options ?\n "); scanf ("%[^\n]", general[5]); fflush (stdin); printf ("OS/DOS version ?\n "); scanf ("%[^\n]", general[6]); fflush (stdin); printf ("Your name ?\n "); scanf ("%[^\n]", general[7]); fflush (stdin); printf ("Where from ?\n "); scanf ("%[^\n]", general[8]); fflush (stdin); printf ("Mail address ?\n "); scanf ("%[^\n]", general[0]); fflush (stdin); /************************************************************************ * Add results to output file LLloops.txt * ************************************************************************/ fprintf (outfile, "----------------- ----------------- --------- " "--------- ---------\n"); fprintf (outfile, "LINPACK BENCHMARK FOR PCs 'C/C++' n @ 100\n\n"); fprintf (outfile, "Month run %d/%d\n", this_month, this_year); fprintf (outfile, "PC model %s\n", general[1]); fprintf (outfile, "CPU %s\n", general[2]); fprintf (outfile, "Clock MHz %s\n", general[3]); fprintf (outfile, "Cache %s\n", general[4]); fprintf (outfile, "Options %s\n", general[5]); fprintf (outfile, "OS/DOS %s\n", general[6]); fprintf (outfile, "Compiler %s\n", compiler); fprintf (outfile, "OptLevel %s\n", options); fprintf (outfile, "Run by %s\n", general[7]); fprintf (outfile, "From %s\n", general[8]); fprintf (outfile, "Mail %s\n\n", general[0]); fprintf(outfile, "Rolling %s\n",ROLLING); fprintf(outfile, "Precision %s\n",PREC); fprintf(outfile, "norm. resid %16.1f\n",(double)residn); fprintf(outfile, "resid %16.8e\n",(double)resid); fprintf(outfile, "machep %16.8e\n",(double)epsn); fprintf(outfile, "x[0]-1 %16.8e\n",(double)x1); fprintf(outfile, "x[n-1]-1 %16.8e\n",(double)x2); fprintf(outfile, "matgen 1 seconds %16.5f\n",overhead1); fprintf(outfile, "matgen 2 seconds %16.5f\n",overhead2); fprintf(outfile, "Repetitions %16d\n",ntimes); fprintf(outfile, "Leading dimension %16d\n",lda); fprintf(outfile, " dgefa dgesl " " total Mflops\n"); fprintf(outfile, "1 pass seconds %16.5f %9.5f %9.5f\n", atime[0][0], atime[1][0], atime[2][0]); for (i=1 ; i<6 ; i++) { fprintf(outfile, "Repeat seconds %16.5f %9.5f %9.5f %9.2f\n", atime[0][i], atime[1][i], atime[2][i], atime[3][i]); } fprintf(outfile, "Average %46.2f\n",atime[3][6]); fprintf(outfile, "Leading dimension %16d\n",ldaa); for (i=7 ; i<12 ; i++) { fprintf(outfile, "Repeat seconds %16.5f %9.5f %9.5f %9.2f\n", atime[0][i], atime[1][i], atime[2][i], atime[3][i]); } fprintf(outfile, "Average %46.2f\n\n",atime[3][12]); fclose (outfile); printf("\nPress any key\n"); #ifdef DOS Endit = getch(); #endif }
void main () { static REAL aa[200][200],a[200][201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps; REAL epslon(),kf; #if 0 double t1; double tm; #endif double tm2; double dtime(); static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops; static user_timer second_timer; lda = 201; ldaa = 200; cray = .056; n = 100; printf(ROLLING); printf(PREC); printf("Precision Linpack\n\n"); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); TimerOn(); dgefa(a,lda,n,ipvt,&info); TimerOff(); st[0][0] = TimerElapsed(); Report( "clinpack(dgefa#1)", st[0][0] ); TimerOn(); dgesl(a,lda,n,ipvt,b,0); TimerOff(); st[1][0] = TimerElapsed(); Report( "clinpack(dgesl#1)", st[1][0] ); total = st[0][0] + st[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); printf(" norm. resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%8.1f %16.8e%16.8e%16.8e%16.8e\n", (double)residn, (double)resid, (double)eps, (double)x[0]-1, (double)x[n-1]-1); printf(" times are reported for matrices of order %5d\n",n); printf(" dgefa dgesl total kflops unit"); printf(" ratio\n"); st[2][0] = total; st[3][0] = ops/(1.0e3*total); st[4][0] = 2.0e3/st[3][0]; st[5][0] = total/cray; printf(" times for array with leading dimension of%5d\n",lda); print_time(0); matgen(a,lda,n,b,&norma); TimerOn(); dgefa(a,lda,n,ipvt,&info); TimerOff(); st[0][1] = TimerElapsed(); Report( "clinpack(dgefa#2)", st[0][1] ); TimerOn(); dgesl(a,lda,n,ipvt,b,0); TimerOff(); st[1][1] = TimerElapsed(); Report( "clinpack(dgesl#2)", st[1][1] ); total = st[0][1] + st[1][1]; st[2][1] = total; st[3][1] = ops/(1.0e3*total); st[4][1] = 2.0e3/st[3][1]; st[5][1] = total/cray; matgen(a,lda,n,b,&norma); TimerOn(); dgefa(a,lda,n,ipvt,&info); TimerOff(); st[0][2] = TimerElapsed(); Report( "clinpack(dgefa#3)", st[0][2] ); TimerOn(); dgesl(a,lda,n,ipvt,b,0); TimerOff(); st[1][2] = TimerElapsed(); Report( "clinpack(dgesl#3)", st[1][2] ); total = st[0][2] + st[1][2]; st[2][2] = total; st[3][2] = ops/(1.0e3*total); st[4][2] = 2.0e3/st[3][2]; st[5][2] = total/cray; ntimes = NTIMES; tm2 = 0.0; UserTimerOn( &second_timer ); for (i = 0; i < ntimes; i++) { TimerOn(); matgen(a,lda,n,b,&norma); TimerOff(); tm2 = tm2 + TimerElapsed(); dgefa(a,lda,n,ipvt,&info); } UserTimerOff( &second_timer ); st[0][3] = ( UserTimerElapsed( &second_timer ) - tm2)/ntimes; Report( "clinpack(dgefa#4)", st[0][3] ); TimerOn(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } TimerOff(); st[1][3] = TimerElapsed()/ntimes; Report( "clinpack(dgesl#4)", st[1][3] ); total = st[0][3] + st[1][3]; st[2][3] = total; st[3][3] = ops/(1.0e3*total); st[4][3] = 2.0e3/st[3][3]; st[5][3] = total/cray; print_time(1); print_time(2); print_time(3); matgen(aa,ldaa,n,b,&norma); TimerOn(); dgefa(aa,ldaa,n,ipvt,&info); TimerOff(); st[0][4] = TimerElapsed(); Report( "clinpack(dgefa#5)", st[0][4] ); TimerOn(); dgesl(aa,ldaa,n,ipvt,b,0); TimerOff(); st[1][4] = TimerElapsed(); Report( "clinpack(dgesl#5)", st[1][4] ); total = st[0][4] + st[1][4]; st[2][4] = total; st[3][4] = ops/(1.0e3*total); st[4][4] = 2.0e3/st[3][4]; st[5][4] = total/cray; matgen(aa,ldaa,n,b,&norma); TimerOn(); dgefa(aa,ldaa,n,ipvt,&info); TimerOff(); st[0][5] = TimerElapsed(); Report( "clinpack(dgefa#6)", st[0][5] ); TimerOn(); dgesl(aa,ldaa,n,ipvt,b,0); TimerOff(); st[1][5] = TimerElapsed(); Report( "clinpack(dgesl#6)", st[1][5] ); total = st[0][5] + st[1][5]; st[2][5] = total; st[3][5] = ops/(1.0e3*total); st[4][5] = 2.0e3/st[3][5]; st[5][5] = total/cray; matgen(aa,ldaa,n,b,&norma); TimerOn(); dgefa(aa,ldaa,n,ipvt,&info); TimerOff(); st[0][6] = TimerElapsed(); Report( "clinpack(dgefa#7)", st[0][6] ); TimerOn(); dgesl(aa,ldaa,n,ipvt,b,0); TimerOff(); st[1][6] = TimerElapsed(); Report( "clinpack(dgesl#7)", st[1][6] ); total = st[0][6] + st[1][6]; st[2][6] = total; st[3][6] = ops/(1.0e3*total); st[4][6] = 2.0e3/st[3][6]; st[5][6] = total/cray; ntimes = NTIMES; tm2 = 0; UserTimerOn( &second_timer ); for (i = 0; i < ntimes; i++) { TimerOn(); matgen(aa,ldaa,n,b,&norma); TimerOff(); tm2 = tm2 + TimerElapsed(); dgefa(aa,ldaa,n,ipvt,&info); } UserTimerOff( &second_timer ); st[0][7] = ( UserTimerElapsed( &second_timer ) - tm2 ) / ntimes; Report( "clinpack(dgefa#8)", st[0][7] ); TimerOn(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } TimerOff(); st[1][7] = TimerElapsed()/ntimes; Report( "clinpack(dgesl#8)", st[1][7] ); total = st[0][7] + st[1][7]; st[2][7] = total; st[3][7] = ops/(1.0e3*total); st[4][7] = 2.0e3/st[3][7]; st[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(st[3][3],st[3][7]))" */ /* kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } */ if ( st[3][3] < ZERO ) st[3][3] = ZERO; if ( st[3][7] < ZERO ) st[3][7] = ZERO; kf = st[3][3]; if ( st[3][7] < st[3][3] ) kf = st[3][7]; kflops = (int)(kf + 0.5); printf(" times for array with leading dimension of%4d\n",ldaa); print_time(4); print_time(5); print_time(6); print_time(7); printf(ROLLING); printf(PREC); printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES); exit( EXIT_SUCCESS ); }
main () { static REAL aa[200][200],a[200][201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,t1,tm,tm2; REAL epslon(),second(),kf; static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops; lda = 201; ldaa = 200; cray = .056; n = 100; fprintf(stdout,ROLLING);fprintf(stdout,PREC);fprintf(stdout,"Precision Linpack\n\n"); fprintf(stderr,ROLLING);fprintf(stderr,PREC);fprintf(stderr,"Precision Linpack\n\n"); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time[0][0] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time[1][0] = second() - t1; total = time[0][0] + time[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); printf(" norm. resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf(" %8.1f %16.8e%16.8e%16.8e%16.8e\n", (double)residn, (double)resid, (double)eps, (double)x[0]-1, (double)x[n-1]-1); fprintf(stderr," times are reported for matrices of order %5d\n",n); fprintf(stderr," dgefa dgesl total kflops unit"); fprintf(stderr," ratio\n"); time[2][0] = total; time[3][0] = ops/(1.0e3*total); time[4][0] = 2.0e3/time[3][0]; time[5][0] = total/cray; fprintf(stderr," times for array with leading dimension of%5d\n",lda); print_time(0); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time[0][1] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time[1][1] = second() - t1; total = time[0][1] + time[1][1]; time[2][1] = total; time[3][1] = ops/(1.0e3*total); time[4][1] = 2.0e3/time[3][1]; time[5][1] = total/cray; matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time[0][2] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time[1][2] = second() - t1; total = time[0][2] + time[1][2]; time[2][2] = total; time[3][2] = ops/(1.0e3*total); time[4][2] = 2.0e3/time[3][2]; time[5][2] = total/cray; ntimes = NTIMES; tm2 = 0.0; t1 = second(); for (i = 0; i < ntimes; i++) { tm = second(); matgen(a,lda,n,b,&norma); tm2 = tm2 + second() - tm; dgefa(a,lda,n,ipvt,&info); } time[0][3] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } time[1][3] = (second() - t1)/ntimes; total = time[0][3] + time[1][3]; time[2][3] = total; time[3][3] = ops/(1.0e3*total); time[4][3] = 2.0e3/time[3][3]; time[5][3] = total/cray; print_time(1); print_time(2); print_time(3); matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time[0][4] = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time[1][4] = second() - t1; total = time[0][4] + time[1][4]; time[2][4] = total; time[3][4] = ops/(1.0e3*total); time[4][4] = 2.0e3/time[3][4]; time[5][4] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time[0][5] = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time[1][5] = second() - t1; total = time[0][5] + time[1][5]; time[2][5] = total; time[3][5] = ops/(1.0e3*total); time[4][5] = 2.0e3/time[3][5]; time[5][5] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time[0][6] = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time[1][6] = second() - t1; total = time[0][6] + time[1][6]; time[2][6] = total; time[3][6] = ops/(1.0e3*total); time[4][6] = 2.0e3/time[3][6]; time[5][6] = total/cray; ntimes = NTIMES; tm2 = 0; t1 = second(); for (i = 0; i < ntimes; i++) { tm = second(); matgen(aa,ldaa,n,b,&norma); tm2 = tm2 + second() - tm; dgefa(aa,ldaa,n,ipvt,&info); } time[0][7] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } time[1][7] = (second() - t1)/ntimes; total = time[0][7] + time[1][7]; time[2][7] = total; time[3][7] = ops/(1.0e3*total); time[4][7] = 2.0e3/time[3][7]; time[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(time[3][3],time[3][7]))" */ kf = (time[3][3] < time[3][7]) ? time[3][3] : time[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } fprintf(stderr," times for array with leading dimension of%4d\n",ldaa); print_time(4); print_time(5); print_time(6); print_time(7); fprintf(stderr,ROLLING);fprintf(stderr,PREC); fprintf(stderr," Precision %5d Kflops ; %d Reps \n",kflops,NTIMES); }
int clinpack_kflops ( int ntimes ) { static REAL aa[200][200],a[200][201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps; REAL kf; double t1,tm,tm2; static int ipvt[200],n,i,info,lda,ldaa,kflops; #if defined(WIN32) static float one_tick = .0001; #else static long clock_tick = -1; static float one_tick; if ( clock_tick < 1 || clock_tick > 1000) { clock_tick = sysconf( _SC_CLK_TCK ); /* clock_tick is the number of ticks per second */ one_tick = (float) 1 / clock_tick; /* one_tick is the length of time for one tick */ } #endif lda = 201; ldaa = 200; cray = .056; n = 100; ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen((double *)a,lda,n,b,&norma); t1 = dtime(); dgefa((double *)a,lda,n,ipvt,&info); st[0][0] = dtime() - t1; t1 = dtime(); dgesl((double *)a,lda,n,ipvt,b,0); st[1][0] = dtime() - t1; total = st[0][0] + st[1][0]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen((double *)a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,(double *)a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); st[2][0] = total; st[3][0] = ops/(1.0e3*total); st[4][0] = 2.0e3/st[3][0]; st[5][0] = total/cray; matgen((double *)a,lda,n,b,&norma); t1 = dtime(); dgefa((double *)a,lda,n,ipvt,&info); st[0][1] = dtime() - t1; t1 = dtime(); dgesl((double *)a,lda,n,ipvt,b,0); st[1][1] = dtime() - t1; total = st[0][1] + st[1][1]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][1] = total; st[3][1] = ops/(1.0e3*total); st[4][1] = 2.0e3/st[3][1]; st[5][1] = total/cray; matgen((double *)a,lda,n,b,&norma); t1 = dtime(); dgefa((double *)a,lda,n,ipvt,&info); st[0][2] = dtime() - t1; t1 = dtime(); dgesl((double *)a,lda,n,ipvt,b,0); st[1][2] = dtime() - t1; total = st[0][2] + st[1][2]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][2] = total; st[3][2] = ops/(1.0e3*total); st[4][2] = 2.0e3/st[3][2]; st[5][2] = total/cray; tm2 = 0.0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen((double *)a,lda,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa((double *)a,lda,n,ipvt,&info); } st[0][3] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl((double *)a,lda,n,ipvt,b,0); } st[1][3] = (dtime() - t1)/ntimes; total = st[0][3] + st[1][3]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][3] = total; st[3][3] = ops/(1.0e3*total); st[4][3] = 2.0e3/st[3][3]; st[5][3] = total/cray; matgen((double *)aa,ldaa,n,b,&norma); t1 = dtime(); dgefa((double *)aa,ldaa,n,ipvt,&info); st[0][4] = dtime() - t1; t1 = dtime(); dgesl((double *)aa,ldaa,n,ipvt,b,0); st[1][4] = dtime() - t1; total = st[0][4] + st[1][4]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][4] = total; st[3][4] = ops/(1.0e3*total); st[4][4] = 2.0e3/st[3][4]; st[5][4] = total/cray; matgen((double *)aa,ldaa,n,b,&norma); t1 = dtime(); dgefa((double *)aa,ldaa,n,ipvt,&info); st[0][5] = dtime() - t1; t1 = dtime(); dgesl((double *)aa,ldaa,n,ipvt,b,0); st[1][5] = dtime() - t1; total = st[0][5] + st[1][5]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][5] = total; st[3][5] = ops/(1.0e3*total); st[4][5] = 2.0e3/st[3][5]; st[5][5] = total/cray; matgen((double *)aa,ldaa,n,b,&norma); t1 = dtime(); dgefa((double *)aa,ldaa,n,ipvt,&info); st[0][6] = dtime() - t1; t1 = dtime(); dgesl((double *)aa,ldaa,n,ipvt,b,0); st[1][6] = dtime() - t1; total = st[0][6] + st[1][6]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][6] = total; st[3][6] = ops/(1.0e3*total); st[4][6] = 2.0e3/st[3][6]; st[5][6] = total/cray; tm2 = 0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen((double *)aa,ldaa,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa((double *)aa,ldaa,n,ipvt,&info); } st[0][7] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl((double *)aa,ldaa,n,ipvt,b,0); } st[1][7] = (dtime() - t1)/ntimes; total = st[0][7] + st[1][7]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][7] = total; st[3][7] = ops/(1.0e3*total); st[4][7] = 2.0e3/st[3][7]; st[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(st[3][3],st[3][7]))" */ /* kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } */ if ( st[3][3] < ZERO ) st[3][3] = ZERO; if ( st[3][7] < ZERO ) st[3][7] = ZERO; kf = st[3][3]; if ( st[3][7] < st[3][3] ) kf = st[3][7]; kflops = (int)(kf + 0.5); return kflops; }
jobject Java_rs_pedjaapps_Linpack_MainActivity_runLinpack (JNIEnv* env, jobject thiz, jclass resultClass) { __android_log_write (ANDROID_LOG_DEBUG, "linpack-jni.c", "running neon linpack"); static REAL aa[200*200],a[200*201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,tm2,epsn,x1,x2; REAL mflops; static int ipvt[200],n,i,j,ntimes,info,lda,ldaa; int endit, pass, loop; REAL overhead1, overhead2, time2; REAL max1, max2; char resultchars[1000]; lda = 201; ldaa = 200; cray = .056; n = 100; ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); start_time(); dgefa(a,lda,n,ipvt,&info); end_time(); atime[0][0] = secs; start_time(); dgesl(a,lda,n,ipvt,b,0); end_time(); atime[1][0] = secs; total = atime[0][0] + atime[1][0]; // compute a residual to verify results. for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon(ONE); residn = resid/( n*norma*normx*eps ); epsn = eps; x1 = x[0] - 1; x2 = x[n-1] - 1; atime[2][0] = total; if (total > 0.0) { atime[3][0] = ops/(1.0e6*total); atime[4][0] = 2.0/atime[3][0]; } else { atime[3][0] = 0.0; atime[4][0] = 0.0; } atime[5][0] = total/cray; // ************************************************************************ // * Calculate overhead of executing matgen procedure * // ************************************************************************ pass = -20; loop = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < loop ; i++) { matgen(a,lda,n,b,&norma); } end_time(); overhead1 = secs; if (overhead1 > runSecs) { pass = 0; } if (pass < 0) { if (overhead1 < 0.1) { loop = loop * 10; } else { loop = loop * 2; } } } while (pass < 0); overhead1 = overhead1 / (double)loop; // ************************************************************************ // * Calculate matgen/dgefa passes for runSecs seconds * // ************************************************************************ pass = -20; ntimes = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < ntimes ; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); time2 = secs; if (time2 > runSecs) { pass = 0; } if (pass < 0) { if (time2 < 0.1) { ntimes = ntimes * 10; } else { ntimes = ntimes * 2; } } } while (pass < 0); ntimes = (int)(runSecs * (double)ntimes / time2); if (ntimes == 0) ntimes = 1; // ************************************************************************ // * Execute 5 passes * // ************************************************************************ tm2 = ntimes * overhead1; atime[3][6] = 0; for (j=1 ; j<6 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][6] = atime[3][6] + atime[3][j]; } atime[3][6] = atime[3][6] / 5.0; // ************************************************************************ // * Calculate overhead of executing matgen procedure * // ************************************************************************ start_time(); for ( i = 0 ; i < loop ; i++) { matgen(aa,ldaa,n,b,&norma); } end_time(); overhead2 = secs; overhead2 = overhead2 / (double)loop; // ************************************************************************ // * Execute 5 passes * // ************************************************************************ tm2 = ntimes * overhead2; atime[3][12] = 0; for (j=7 ; j<12 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(aa,ldaa,n,b,&norma); dgefa(aa,ldaa,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][12] = atime[3][12] + atime[3][j]; } atime[3][12] = atime[3][12] / 5.0; // ************************************************************************ // * Use minimum average as overall Mflops rating * // ************************************************************************ mflops = atime[3][6]; if (atime[3][12] < mflops) mflops = atime[3][12]; // ************************************************************************ // * Add results to output file Linpack.txt * // ************************************************************************ max1 = 0; for (i=1 ; i<6 ; i++) { if (atime[3][i] > max1) max1 = atime[3][i]; } max2 = 0; for (i=7 ; i<12 ; i++) { if (atime[3][i] > max2) max2 = atime[3][i]; } if (max1 < max2) max2 = max1; jmethodID jConstructor = (*env)->GetMethodID (env, resultClass, "<init>", "()V"); if (jConstructor == NULL)__android_log_write (ANDROID_LOG_ERROR, "linpack-jni.c", "jConstructor is NULL"); jobject resultObject = (*env)->NewObject (env, resultClass, jConstructor); /*mFlops, residn, resid, epsn, x1, x2;*/ jfieldID jMFlops = (*env)->GetFieldID (env, resultClass, "mflops", "D"); jfieldID jResidn = (*env)->GetFieldID (env, resultClass, "nres", "D"); jfieldID jEpsn = (*env)->GetFieldID (env, resultClass, "precision", "D"); (*env)->SetDoubleField (env, resultObject, jMFlops, max2); (*env)->SetDoubleField (env, resultObject, jResidn, (double) residn); (*env)->SetDoubleField (env, resultObject, jEpsn, (double) epsn); return resultObject; }