static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=second(); for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,1); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,1); tdgesl += second()-t1; } for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,0); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,0); tdgesl += second()-t1; } totalt=second()-totalt; if (totalt<0.5 || tdgefa+tdgesl<0.2) return(0.); kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps,totalt,100.*tdgefa/totalt, 100.*tdgesl/totalt,100.*toverhead/totalt, kflops/1000.0); if(totalt >= 4.0) call_objc_obj(kflops/1000.0); //printA(a,lda,arsize); //TODO ADDED BY ME FOR TESTING PURPOSES return(totalt); }
static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=second(); for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,1); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,1); tdgesl += second()-t1; } for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,0); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,0); tdgesl += second()-t1; } totalt=second()-totalt; if (totalt<0.5 || tdgefa+tdgesl<0.2) return(0.); kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps,totalt,100.*tdgefa/totalt, 100.*tdgesl/totalt,100.*toverhead/totalt, kflops/1000.0); if(totalt > 10.){ //publish the result in the benchmark database publish_linpack_result("http://modev.mine.nu:8070/benchmark/publish_result.php", "1337", "MoSync", "987123ab", "HTC%20Wildfire", "2", kflops/1000.0); } return(totalt); }
static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=second(); for (i=0; i<nreps; i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,1); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,1); tdgesl += second()-t1; } for (i=0; i<nreps; i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,0); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,0); tdgesl += second()-t1; } totalt=second()-totalt; kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; printf("%f\n", kflops); return(totalt); }
static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=dtime(); for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info,roll); tdgefa += dtime()-t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0,roll); tdgesl += dtime()-t1; } totalt=dtime()-totalt; if (totalt<0.5 || tdgefa+tdgesl<0.2) return(0.); kflops=nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps,totalt,100.*tdgefa/totalt, 100.*tdgesl/totalt,100.*toverhead/totalt, kflops); return(totalt); }
int main (int argc, char *argv[]) { static REAL aa[200*200],a[200*201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,tm2,epsn,x1,x2; REAL mflops; static int ipvt[200],n,i,j,ntimes,info,lda,ldaa; int endit, pass, loop; REAL overhead1, overhead2, time2; REAL max1, max2; char was[5][20]; char expect[5][20]; char title[5][20]; int errors; printf("\n"); printf("##########################################\n"); lda = 201; ldaa = 200; cray = .056; n = 100; fprintf(stdout, "%s ", ROLLING); fprintf(stdout, "%s ", PREC); fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n"); fprintf(stdout,"Optimisation %s\n\n",options); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); start_time(); dgefa(a,lda,n,ipvt,&info); end_time(); atime[0][0] = secs; start_time(); dgesl(a,lda,n,ipvt,b,0); end_time(); atime[1][0] = secs; total = atime[0][0] + atime[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon(ONE); residn = resid/( n*norma*normx*eps ); epsn = eps; x1 = x[0] - 1; x2 = x[n-1] - 1; printf("norm resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n", (double)residn, (double)resid, (double)epsn, (double)x1, (double)x2); fprintf(stderr,"Times are reported for matrices of order %5d\n",n); fprintf(stderr,"1 pass times for array with leading dimension of%5d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); atime[2][0] = total; if (total > 0.0) { atime[3][0] = ops/(1.0e6*total); atime[4][0] = 2.0/atime[3][0]; } else { atime[3][0] = 0.0; atime[4][0] = 0.0; } atime[5][0] = total/cray; print_time(0); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ fprintf (stderr,"\nCalculating matgen overhead\n"); pass = -20; loop = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < loop ; i++) { matgen(a,lda,n,b,&norma); } end_time(); overhead1 = secs; fprintf (stderr,"%10d times %6.2f seconds\n", loop, overhead1); if (overhead1 > runSecs) { pass = 0; } if (pass < 0) { if (overhead1 < 0.1) { loop = loop * 10; } else { loop = loop * 2; } } } while (pass < 0); overhead1 = overhead1 / (double)loop; fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1); /************************************************************************ * Calculate matgen/dgefa passes for runSecs seconds * ************************************************************************/ fprintf (stderr,"Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs); pass = -20; ntimes = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < ntimes ; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); time2 = secs; fprintf (stderr,"%10d times %6.2f seconds\n", ntimes, time2); if (time2 > runSecs) { pass = 0; } if (pass < 0) { if (time2 < 0.1) { ntimes = ntimes * 10; } else { ntimes = ntimes * 2; } } } while (pass < 0); ntimes = (int)(runSecs * (double)ntimes / time2); if (ntimes == 0) ntimes = 1; fprintf(stderr,"Passes used %10d \n\n", ntimes); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead1; atime[3][6] = 0; for (j=1 ; j<6 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][6] = atime[3][6] + atime[3][j]; print_time(j); } atime[3][6] = atime[3][6] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][6]); fprintf (stderr,"\nCalculating matgen2 overhead\n"); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ start_time(); for ( i = 0 ; i < loop ; i++) { matgen(aa,ldaa,n,b,&norma); } end_time(); overhead2 = secs; overhead2 = overhead2 / (double)loop; fprintf(stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",ldaa); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead2; atime[3][12] = 0; for (j=7 ; j<12 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(aa,ldaa,n,b,&norma); dgefa(aa,ldaa,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][12] = atime[3][12] + atime[3][j]; print_time(j); } atime[3][12] = atime[3][12] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][12]); /************************************************************************ * Use minimum average as overall Mflops rating * ************************************************************************/ mflops = atime[3][6]; if (atime[3][12] < mflops) mflops = atime[3][12]; fprintf(stderr,"\n"); fprintf(stderr, "%s ", ROLLING); fprintf(stderr, "%s ", PREC); fprintf(stderr," Precision %11.2f Mflops \n\n",mflops); max1 = 0; for (i=1 ; i<6 ; i++) { if (atime[3][i] > max1) max1 = atime[3][i]; } max2 = 0; for (i=7 ; i<12 ; i++) { if (atime[3][i] > max2) max2 = atime[3][i]; } if (max1 < max2) max2 = max1; sprintf(was[0], "%16.1f",(double)residn); sprintf(was[1], "%16.8e",(double)resid); sprintf(was[2], "%16.8e",(double)epsn); sprintf(was[3], "%16.8e",(double)x1); sprintf(was[4], "%16.8e",(double)x2); /* // Values for Watcom sprintf(expect[0], " 0.4"); sprintf(expect[1], " 7.41628980e-014"); sprintf(expect[2], " 1.00000000e-015"); sprintf(expect[3], "-1.49880108e-014"); sprintf(expect[4], "-1.89848137e-014"); // Values for Visual C++ sprintf(expect[0], " 1.7"); sprintf(expect[1], " 7.41628980e-014"); sprintf(expect[2], " 2.22044605e-016"); sprintf(expect[3], "-1.49880108e-014"); sprintf(expect[4], "-1.89848137e-014"); // Values for Ubuntu GCC 32 Bit sprintf(expect[0], " 1.9"); sprintf(expect[1], " 8.39915160e-14"); sprintf(expect[2], " 2.22044605e-16"); sprintf(expect[3], " -6.22835117e-14"); sprintf(expect[4], " -4.16333634e-14"); */ // Values for Ubuntu GCC 32 Bit sprintf(expect[0], " 1.7"); sprintf(expect[1], " 7.41628980e-14"); sprintf(expect[2], " 2.22044605e-16"); sprintf(expect[3], " -1.49880108e-14"); sprintf(expect[4], " -1.89848137e-14"); sprintf(title[0], "norm. resid"); sprintf(title[1], "resid "); sprintf(title[2], "machep "); sprintf(title[3], "x[0]-1 "); sprintf(title[4], "x[n-1]-1 "); if (strtol(opt, NULL, 10) == 0) { sprintf(expect[2], " 8.88178420e-016"); } errors = 0; printf ("\n"); }
main () #endif { static REAL aa[ORDER2][ORDER2],a[ORDER2][ORDER2P1],b[ORDER2],x[ORDER2]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps; REAL kf; double t1,tm,tm2,dtime(); static int ipvt[ORDER2],n,i,ntimes,info,lda,ldaa,kflops; lda = ORDER2P1; ldaa = ORDER2; cray = .056; n = ORDER; #ifdef OMPC { int c; extern char *optarg; while ((c = getopt (argc, argv, "b:")) != EOF) { switch (c) { case 'b': bf = atoi (optarg); break; } } } if (omp_get_max_threads () != 1) { printf ("OpenMP(%d threads)\n", omp_get_max_threads ()); } else { printf ("OpenMP(1 thread)\n"); } #endif printf(ROLLING); printf(PREC); printf("Precision Linpack\n\n"); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info); st[0][0] = dtime() - t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0); st[1][0] = dtime() - t1; total = st[0][0] + st[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); printf(" norm. resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%8.1f %16.8e%16.8e%16.8e%16.8e\n", (double)residn, (double)resid, (double)eps, (double)x[0]-1, (double)x[n-1]-1); printf(" times are reported for matrices of order %5d\n",n); printf(" dgefa dgesl total kflops unit"); printf(" ratio\n"); st[2][0] = total; st[3][0] = ops/(1.0e3*total); st[4][0] = 2.0e3/st[3][0]; st[5][0] = total/cray; printf(" times for array with leading dimension of%5d\n",lda); print_time(0); matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info); st[0][1] = dtime() - t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0); st[1][1] = dtime() - t1; total = st[0][1] + st[1][1]; st[2][1] = total; st[3][1] = ops/(1.0e3*total); st[4][1] = 2.0e3/st[3][1]; st[5][1] = total/cray; matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info); st[0][2] = dtime() - t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0); st[1][2] = dtime() - t1; total = st[0][2] + st[1][2]; st[2][2] = total; st[3][2] = ops/(1.0e3*total); st[4][2] = 2.0e3/st[3][2]; st[5][2] = total/cray; ntimes = NTIMES; tm2 = 0.0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen(a,lda,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa(a,lda,n,ipvt,&info); } st[0][3] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } st[1][3] = (dtime() - t1)/ntimes; total = st[0][3] + st[1][3]; st[2][3] = total; st[3][3] = ops/(1.0e3*total); st[4][3] = 2.0e3/st[3][3]; st[5][3] = total/cray; print_time(1); print_time(2); print_time(3); matgen(aa,ldaa,n,b,&norma); t1 = dtime(); dgefa(aa,ldaa,n,ipvt,&info); st[0][4] = dtime() - t1; t1 = dtime(); dgesl(aa,ldaa,n,ipvt,b,0); st[1][4] = dtime() - t1; total = st[0][4] + st[1][4]; st[2][4] = total; st[3][4] = ops/(1.0e3*total); st[4][4] = 2.0e3/st[3][4]; st[5][4] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = dtime(); dgefa(aa,ldaa,n,ipvt,&info); st[0][5] = dtime() - t1; t1 = dtime(); dgesl(aa,ldaa,n,ipvt,b,0); st[1][5] = dtime() - t1; total = st[0][5] + st[1][5]; st[2][5] = total; st[3][5] = ops/(1.0e3*total); st[4][5] = 2.0e3/st[3][5]; st[5][5] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = dtime(); dgefa(aa,ldaa,n,ipvt,&info); st[0][6] = dtime() - t1; t1 = dtime(); dgesl(aa,ldaa,n,ipvt,b,0); st[1][6] = dtime() - t1; total = st[0][6] + st[1][6]; st[2][6] = total; st[3][6] = ops/(1.0e3*total); st[4][6] = 2.0e3/st[3][6]; st[5][6] = total/cray; ntimes = NTIMES; tm2 = 0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen(aa,ldaa,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa(aa,ldaa,n,ipvt,&info); } st[0][7] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } st[1][7] = (dtime() - t1)/ntimes; total = st[0][7] + st[1][7]; st[2][7] = total; st[3][7] = ops/(1.0e3*total); st[4][7] = 2.0e3/st[3][7]; st[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(st[3][3],st[3][7]))" */ /* kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } */ if ( st[3][3] < ZERO ) st[3][3] = ZERO; if ( st[3][7] < ZERO ) st[3][7] = ZERO; kf = st[3][3]; if ( st[3][7] < st[3][3] ) kf = st[3][7]; kflops = (int)(kf + 0.5); printf(" times for array with leading dimension of%5d\n",ldaa); print_time(4); print_time(5); print_time(6); print_time(7); printf(ROLLING); printf(PREC); printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES); }
main () { static REAL aa[200*200],a[200*201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,t1,tm2,epsn,x1,x2; REAL mflops; static int ipvt[200],n,i,j,ntimes,info,lda,ldaa; int Endit, pass, loop; REAL overhead1, overhead2, time1, time2; FILE *outfile; char *compiler, *options, general[9][80] = {" "}; outfile = fopen("Linpack.txt","a+"); if (outfile == NULL) { printf ("Cannot open results file \n\n"); printf("Press any key\n"); #ifdef DOS Endit = getch(); #endif exit (0); } /************************************************************************ * Enter details of compiler and options used * ************************************************************************/ /*----------------- --------- --------- ---------*/ compiler = "INSERT COMPILER NAME HERE"; options = "INSERT OPTIMISATION OPTIONS HERE"; /* Include -dDP or -dSP and -dROLL or -dUNROLL */ lda = 201; ldaa = 200; cray = .056; n = 100; fprintf(stdout,ROLLING);fprintf(stdout,PREC); fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n"); fprintf(stdout,"Compiler %s\n",compiler); fprintf(stdout,"Optimisation %s\n\n",options); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); atime[0][0] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); atime[1][0] = second() - t1; total = atime[0][0] + atime[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon(ONE); residn = resid/( n*norma*normx*eps ); epsn = eps; x1 = x[0] - 1; x2 = x[n-1] - 1; printf("norm resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n", (double)residn, (double)resid, (double)epsn, (double)x1, (double)x2); fprintf(stderr,"Times are reported for matrices of order %5d\n",n); fprintf(stderr,"1 pass times for array with leading dimension of%5d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); atime[2][0] = total; if (total > 0.0) { atime[3][0] = ops/(1.0e6*total); atime[4][0] = 2.0/atime[3][0]; } else { atime[3][0] = 0.0; atime[4][0] = 0.0; } atime[5][0] = total/cray; print_time(0); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ fprintf (stderr,"\nCalculating matgen overhead\n"); pass = -20; loop = NTIMES; do { time1 = second(); pass = pass + 1; for ( i = 0 ; i < loop ; i++) { matgen(a,lda,n,b,&norma); } time2 = second(); overhead1 = (time2 - time1); fprintf (stderr,"%10d times %6.2f seconds\n", loop, overhead1); if (overhead1 > 5.0) { pass = 0; } if (pass < 0) { if (overhead1 < 0.1) { loop = loop * 10; } else { loop = loop * 2; } } } while (pass < 0); overhead1 = overhead1 / (double)loop; fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1); /************************************************************************ * Calculate matgen/dgefa passes for 5 seconds * ************************************************************************/ fprintf (stderr,"Calculating matgen/dgefa passes for 5 seconds\n"); pass = -20; ntimes = NTIMES; do { time1 = second(); pass = pass + 1; for ( i = 0 ; i < ntimes ; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } time2 = second() - time1; fprintf (stderr,"%10d times %6.2f seconds\n", ntimes, time2); if (time2 > 5.0) { pass = 0; } if (pass < 0) { if (time2 < 0.1) { ntimes = ntimes * 10; } else { ntimes = ntimes * 2; } } } while (pass < 0); ntimes = 5.0 * (double)ntimes / time2; if (ntimes == 0) ntimes = 1; fprintf (stderr,"Passes used %10d \n\n", ntimes); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead1; atime[3][6] = 0; for (j=1 ; j<6 ; j++) { t1 = second(); for (i = 0; i < ntimes; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } atime[0][j] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } atime[1][j] = (second() - t1)/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][6] = atime[3][6] + atime[3][j]; print_time(j); } atime[3][6] = atime[3][6] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][6]); fprintf (stderr,"\nCalculating matgen2 overhead\n"); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ time1 = second(); for ( i = 0 ; i < loop ; i++) { matgen(aa,ldaa,n,b,&norma); } time2 = second(); overhead2 = (time2 - time1); overhead2 = overhead2 / (double)loop; fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",ldaa); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead2; atime[3][12] = 0; for (j=7 ; j<12 ; j++) { t1 = second(); for (i = 0; i < ntimes; i++) { matgen(aa,ldaa,n,b,&norma); dgefa(aa,ldaa,n,ipvt,&info ); } atime[0][j] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } atime[1][j] = (second() - t1)/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][12] = atime[3][12] + atime[3][j]; print_time(j); } atime[3][12] = atime[3][12] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][12]); /************************************************************************ * Use minimum average as overall Mflops rating * ************************************************************************/ mflops = atime[3][6]; if (atime[3][12] < mflops) mflops = atime[3][12]; fprintf(stderr,"\n"); fprintf(stderr,ROLLING);fprintf(stderr,PREC); fprintf(stderr," Precision %11.2f Mflops \n\n",mflops); what_date(); /************************************************************************ * Type details of hardware, software etc. * ************************************************************************/ printf ("Enter the following data which will be " "appended to file Linpack.txt \n\n"); printf ("PC Supplier/model ?\n "); scanf ("%[^\n]", general[1]); fflush (stdin); printf ("CPU ?\n "); scanf ("%[^\n]", general[2]); fflush (stdin); printf ("Clock MHz ?\n "); scanf ("%[^\n]", general[3]); fflush (stdin); printf ("Cache ?\n "); scanf ("%[^\n]", general[4]); fflush (stdin); printf ("Chipset/options ?\n "); scanf ("%[^\n]", general[5]); fflush (stdin); printf ("OS/DOS version ?\n "); scanf ("%[^\n]", general[6]); fflush (stdin); printf ("Your name ?\n "); scanf ("%[^\n]", general[7]); fflush (stdin); printf ("Where from ?\n "); scanf ("%[^\n]", general[8]); fflush (stdin); printf ("Mail address ?\n "); scanf ("%[^\n]", general[0]); fflush (stdin); /************************************************************************ * Add results to output file LLloops.txt * ************************************************************************/ fprintf (outfile, "----------------- ----------------- --------- " "--------- ---------\n"); fprintf (outfile, "LINPACK BENCHMARK FOR PCs 'C/C++' n @ 100\n\n"); fprintf (outfile, "Month run %d/%d\n", this_month, this_year); fprintf (outfile, "PC model %s\n", general[1]); fprintf (outfile, "CPU %s\n", general[2]); fprintf (outfile, "Clock MHz %s\n", general[3]); fprintf (outfile, "Cache %s\n", general[4]); fprintf (outfile, "Options %s\n", general[5]); fprintf (outfile, "OS/DOS %s\n", general[6]); fprintf (outfile, "Compiler %s\n", compiler); fprintf (outfile, "OptLevel %s\n", options); fprintf (outfile, "Run by %s\n", general[7]); fprintf (outfile, "From %s\n", general[8]); fprintf (outfile, "Mail %s\n\n", general[0]); fprintf(outfile, "Rolling %s\n",ROLLING); fprintf(outfile, "Precision %s\n",PREC); fprintf(outfile, "norm. resid %16.1f\n",(double)residn); fprintf(outfile, "resid %16.8e\n",(double)resid); fprintf(outfile, "machep %16.8e\n",(double)epsn); fprintf(outfile, "x[0]-1 %16.8e\n",(double)x1); fprintf(outfile, "x[n-1]-1 %16.8e\n",(double)x2); fprintf(outfile, "matgen 1 seconds %16.5f\n",overhead1); fprintf(outfile, "matgen 2 seconds %16.5f\n",overhead2); fprintf(outfile, "Repetitions %16d\n",ntimes); fprintf(outfile, "Leading dimension %16d\n",lda); fprintf(outfile, " dgefa dgesl " " total Mflops\n"); fprintf(outfile, "1 pass seconds %16.5f %9.5f %9.5f\n", atime[0][0], atime[1][0], atime[2][0]); for (i=1 ; i<6 ; i++) { fprintf(outfile, "Repeat seconds %16.5f %9.5f %9.5f %9.2f\n", atime[0][i], atime[1][i], atime[2][i], atime[3][i]); } fprintf(outfile, "Average %46.2f\n",atime[3][6]); fprintf(outfile, "Leading dimension %16d\n",ldaa); for (i=7 ; i<12 ; i++) { fprintf(outfile, "Repeat seconds %16.5f %9.5f %9.5f %9.2f\n", atime[0][i], atime[1][i], atime[2][i], atime[3][i]); } fprintf(outfile, "Average %46.2f\n\n",atime[3][12]); fclose (outfile); printf("\nPress any key\n"); #ifdef DOS Endit = getch(); #endif }
void main () { static REAL aa[200][200],a[200][201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps; REAL epslon(),kf; #if 0 double t1; double tm; #endif double tm2; double dtime(); static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops; static user_timer second_timer; lda = 201; ldaa = 200; cray = .056; n = 100; printf(ROLLING); printf(PREC); printf("Precision Linpack\n\n"); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); TimerOn(); dgefa(a,lda,n,ipvt,&info); TimerOff(); st[0][0] = TimerElapsed(); Report( "clinpack(dgefa#1)", st[0][0] ); TimerOn(); dgesl(a,lda,n,ipvt,b,0); TimerOff(); st[1][0] = TimerElapsed(); Report( "clinpack(dgesl#1)", st[1][0] ); total = st[0][0] + st[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); printf(" norm. resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%8.1f %16.8e%16.8e%16.8e%16.8e\n", (double)residn, (double)resid, (double)eps, (double)x[0]-1, (double)x[n-1]-1); printf(" times are reported for matrices of order %5d\n",n); printf(" dgefa dgesl total kflops unit"); printf(" ratio\n"); st[2][0] = total; st[3][0] = ops/(1.0e3*total); st[4][0] = 2.0e3/st[3][0]; st[5][0] = total/cray; printf(" times for array with leading dimension of%5d\n",lda); print_time(0); matgen(a,lda,n,b,&norma); TimerOn(); dgefa(a,lda,n,ipvt,&info); TimerOff(); st[0][1] = TimerElapsed(); Report( "clinpack(dgefa#2)", st[0][1] ); TimerOn(); dgesl(a,lda,n,ipvt,b,0); TimerOff(); st[1][1] = TimerElapsed(); Report( "clinpack(dgesl#2)", st[1][1] ); total = st[0][1] + st[1][1]; st[2][1] = total; st[3][1] = ops/(1.0e3*total); st[4][1] = 2.0e3/st[3][1]; st[5][1] = total/cray; matgen(a,lda,n,b,&norma); TimerOn(); dgefa(a,lda,n,ipvt,&info); TimerOff(); st[0][2] = TimerElapsed(); Report( "clinpack(dgefa#3)", st[0][2] ); TimerOn(); dgesl(a,lda,n,ipvt,b,0); TimerOff(); st[1][2] = TimerElapsed(); Report( "clinpack(dgesl#3)", st[1][2] ); total = st[0][2] + st[1][2]; st[2][2] = total; st[3][2] = ops/(1.0e3*total); st[4][2] = 2.0e3/st[3][2]; st[5][2] = total/cray; ntimes = NTIMES; tm2 = 0.0; UserTimerOn( &second_timer ); for (i = 0; i < ntimes; i++) { TimerOn(); matgen(a,lda,n,b,&norma); TimerOff(); tm2 = tm2 + TimerElapsed(); dgefa(a,lda,n,ipvt,&info); } UserTimerOff( &second_timer ); st[0][3] = ( UserTimerElapsed( &second_timer ) - tm2)/ntimes; Report( "clinpack(dgefa#4)", st[0][3] ); TimerOn(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } TimerOff(); st[1][3] = TimerElapsed()/ntimes; Report( "clinpack(dgesl#4)", st[1][3] ); total = st[0][3] + st[1][3]; st[2][3] = total; st[3][3] = ops/(1.0e3*total); st[4][3] = 2.0e3/st[3][3]; st[5][3] = total/cray; print_time(1); print_time(2); print_time(3); matgen(aa,ldaa,n,b,&norma); TimerOn(); dgefa(aa,ldaa,n,ipvt,&info); TimerOff(); st[0][4] = TimerElapsed(); Report( "clinpack(dgefa#5)", st[0][4] ); TimerOn(); dgesl(aa,ldaa,n,ipvt,b,0); TimerOff(); st[1][4] = TimerElapsed(); Report( "clinpack(dgesl#5)", st[1][4] ); total = st[0][4] + st[1][4]; st[2][4] = total; st[3][4] = ops/(1.0e3*total); st[4][4] = 2.0e3/st[3][4]; st[5][4] = total/cray; matgen(aa,ldaa,n,b,&norma); TimerOn(); dgefa(aa,ldaa,n,ipvt,&info); TimerOff(); st[0][5] = TimerElapsed(); Report( "clinpack(dgefa#6)", st[0][5] ); TimerOn(); dgesl(aa,ldaa,n,ipvt,b,0); TimerOff(); st[1][5] = TimerElapsed(); Report( "clinpack(dgesl#6)", st[1][5] ); total = st[0][5] + st[1][5]; st[2][5] = total; st[3][5] = ops/(1.0e3*total); st[4][5] = 2.0e3/st[3][5]; st[5][5] = total/cray; matgen(aa,ldaa,n,b,&norma); TimerOn(); dgefa(aa,ldaa,n,ipvt,&info); TimerOff(); st[0][6] = TimerElapsed(); Report( "clinpack(dgefa#7)", st[0][6] ); TimerOn(); dgesl(aa,ldaa,n,ipvt,b,0); TimerOff(); st[1][6] = TimerElapsed(); Report( "clinpack(dgesl#7)", st[1][6] ); total = st[0][6] + st[1][6]; st[2][6] = total; st[3][6] = ops/(1.0e3*total); st[4][6] = 2.0e3/st[3][6]; st[5][6] = total/cray; ntimes = NTIMES; tm2 = 0; UserTimerOn( &second_timer ); for (i = 0; i < ntimes; i++) { TimerOn(); matgen(aa,ldaa,n,b,&norma); TimerOff(); tm2 = tm2 + TimerElapsed(); dgefa(aa,ldaa,n,ipvt,&info); } UserTimerOff( &second_timer ); st[0][7] = ( UserTimerElapsed( &second_timer ) - tm2 ) / ntimes; Report( "clinpack(dgefa#8)", st[0][7] ); TimerOn(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } TimerOff(); st[1][7] = TimerElapsed()/ntimes; Report( "clinpack(dgesl#8)", st[1][7] ); total = st[0][7] + st[1][7]; st[2][7] = total; st[3][7] = ops/(1.0e3*total); st[4][7] = 2.0e3/st[3][7]; st[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(st[3][3],st[3][7]))" */ /* kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } */ if ( st[3][3] < ZERO ) st[3][3] = ZERO; if ( st[3][7] < ZERO ) st[3][7] = ZERO; kf = st[3][3]; if ( st[3][7] < st[3][3] ) kf = st[3][7]; kflops = (int)(kf + 0.5); printf(" times for array with leading dimension of%4d\n",ldaa); print_time(4); print_time(5); print_time(6); print_time(7); printf(ROLLING); printf(PREC); printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES); exit( EXIT_SUCCESS ); }
main () { static REAL aa[200][200],a[200][201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,t1,tm,tm2; REAL epslon(),second(),kf; static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops; lda = 201; ldaa = 200; cray = .056; n = 100; fprintf(stdout,ROLLING);fprintf(stdout,PREC);fprintf(stdout,"Precision Linpack\n\n"); fprintf(stderr,ROLLING);fprintf(stderr,PREC);fprintf(stderr,"Precision Linpack\n\n"); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time[0][0] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time[1][0] = second() - t1; total = time[0][0] + time[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); printf(" norm. resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf(" %8.1f %16.8e%16.8e%16.8e%16.8e\n", (double)residn, (double)resid, (double)eps, (double)x[0]-1, (double)x[n-1]-1); fprintf(stderr," times are reported for matrices of order %5d\n",n); fprintf(stderr," dgefa dgesl total kflops unit"); fprintf(stderr," ratio\n"); time[2][0] = total; time[3][0] = ops/(1.0e3*total); time[4][0] = 2.0e3/time[3][0]; time[5][0] = total/cray; fprintf(stderr," times for array with leading dimension of%5d\n",lda); print_time(0); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time[0][1] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time[1][1] = second() - t1; total = time[0][1] + time[1][1]; time[2][1] = total; time[3][1] = ops/(1.0e3*total); time[4][1] = 2.0e3/time[3][1]; time[5][1] = total/cray; matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time[0][2] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time[1][2] = second() - t1; total = time[0][2] + time[1][2]; time[2][2] = total; time[3][2] = ops/(1.0e3*total); time[4][2] = 2.0e3/time[3][2]; time[5][2] = total/cray; ntimes = NTIMES; tm2 = 0.0; t1 = second(); for (i = 0; i < ntimes; i++) { tm = second(); matgen(a,lda,n,b,&norma); tm2 = tm2 + second() - tm; dgefa(a,lda,n,ipvt,&info); } time[0][3] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } time[1][3] = (second() - t1)/ntimes; total = time[0][3] + time[1][3]; time[2][3] = total; time[3][3] = ops/(1.0e3*total); time[4][3] = 2.0e3/time[3][3]; time[5][3] = total/cray; print_time(1); print_time(2); print_time(3); matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time[0][4] = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time[1][4] = second() - t1; total = time[0][4] + time[1][4]; time[2][4] = total; time[3][4] = ops/(1.0e3*total); time[4][4] = 2.0e3/time[3][4]; time[5][4] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time[0][5] = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time[1][5] = second() - t1; total = time[0][5] + time[1][5]; time[2][5] = total; time[3][5] = ops/(1.0e3*total); time[4][5] = 2.0e3/time[3][5]; time[5][5] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time[0][6] = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time[1][6] = second() - t1; total = time[0][6] + time[1][6]; time[2][6] = total; time[3][6] = ops/(1.0e3*total); time[4][6] = 2.0e3/time[3][6]; time[5][6] = total/cray; ntimes = NTIMES; tm2 = 0; t1 = second(); for (i = 0; i < ntimes; i++) { tm = second(); matgen(aa,ldaa,n,b,&norma); tm2 = tm2 + second() - tm; dgefa(aa,ldaa,n,ipvt,&info); } time[0][7] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } time[1][7] = (second() - t1)/ntimes; total = time[0][7] + time[1][7]; time[2][7] = total; time[3][7] = ops/(1.0e3*total); time[4][7] = 2.0e3/time[3][7]; time[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(time[3][3],time[3][7]))" */ kf = (time[3][3] < time[3][7]) ? time[3][3] : time[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } fprintf(stderr," times for array with leading dimension of%4d\n",ldaa); print_time(4); print_time(5); print_time(6); print_time(7); fprintf(stderr,ROLLING);fprintf(stderr,PREC); fprintf(stderr," Precision %5d Kflops ; %d Reps \n",kflops,NTIMES); }
int clinpack_kflops ( int ntimes ) { static REAL aa[200][200],a[200][201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps; REAL kf; double t1,tm,tm2; static int ipvt[200],n,i,info,lda,ldaa,kflops; #if defined(WIN32) static float one_tick = .0001; #else static long clock_tick = -1; static float one_tick; if ( clock_tick < 1 || clock_tick > 1000) { clock_tick = sysconf( _SC_CLK_TCK ); /* clock_tick is the number of ticks per second */ one_tick = (float) 1 / clock_tick; /* one_tick is the length of time for one tick */ } #endif lda = 201; ldaa = 200; cray = .056; n = 100; ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen((double *)a,lda,n,b,&norma); t1 = dtime(); dgefa((double *)a,lda,n,ipvt,&info); st[0][0] = dtime() - t1; t1 = dtime(); dgesl((double *)a,lda,n,ipvt,b,0); st[1][0] = dtime() - t1; total = st[0][0] + st[1][0]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen((double *)a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,(double *)a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); st[2][0] = total; st[3][0] = ops/(1.0e3*total); st[4][0] = 2.0e3/st[3][0]; st[5][0] = total/cray; matgen((double *)a,lda,n,b,&norma); t1 = dtime(); dgefa((double *)a,lda,n,ipvt,&info); st[0][1] = dtime() - t1; t1 = dtime(); dgesl((double *)a,lda,n,ipvt,b,0); st[1][1] = dtime() - t1; total = st[0][1] + st[1][1]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][1] = total; st[3][1] = ops/(1.0e3*total); st[4][1] = 2.0e3/st[3][1]; st[5][1] = total/cray; matgen((double *)a,lda,n,b,&norma); t1 = dtime(); dgefa((double *)a,lda,n,ipvt,&info); st[0][2] = dtime() - t1; t1 = dtime(); dgesl((double *)a,lda,n,ipvt,b,0); st[1][2] = dtime() - t1; total = st[0][2] + st[1][2]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][2] = total; st[3][2] = ops/(1.0e3*total); st[4][2] = 2.0e3/st[3][2]; st[5][2] = total/cray; tm2 = 0.0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen((double *)a,lda,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa((double *)a,lda,n,ipvt,&info); } st[0][3] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl((double *)a,lda,n,ipvt,b,0); } st[1][3] = (dtime() - t1)/ntimes; total = st[0][3] + st[1][3]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][3] = total; st[3][3] = ops/(1.0e3*total); st[4][3] = 2.0e3/st[3][3]; st[5][3] = total/cray; matgen((double *)aa,ldaa,n,b,&norma); t1 = dtime(); dgefa((double *)aa,ldaa,n,ipvt,&info); st[0][4] = dtime() - t1; t1 = dtime(); dgesl((double *)aa,ldaa,n,ipvt,b,0); st[1][4] = dtime() - t1; total = st[0][4] + st[1][4]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][4] = total; st[3][4] = ops/(1.0e3*total); st[4][4] = 2.0e3/st[3][4]; st[5][4] = total/cray; matgen((double *)aa,ldaa,n,b,&norma); t1 = dtime(); dgefa((double *)aa,ldaa,n,ipvt,&info); st[0][5] = dtime() - t1; t1 = dtime(); dgesl((double *)aa,ldaa,n,ipvt,b,0); st[1][5] = dtime() - t1; total = st[0][5] + st[1][5]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][5] = total; st[3][5] = ops/(1.0e3*total); st[4][5] = 2.0e3/st[3][5]; st[5][5] = total/cray; matgen((double *)aa,ldaa,n,b,&norma); t1 = dtime(); dgefa((double *)aa,ldaa,n,ipvt,&info); st[0][6] = dtime() - t1; t1 = dtime(); dgesl((double *)aa,ldaa,n,ipvt,b,0); st[1][6] = dtime() - t1; total = st[0][6] + st[1][6]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][6] = total; st[3][6] = ops/(1.0e3*total); st[4][6] = 2.0e3/st[3][6]; st[5][6] = total/cray; tm2 = 0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen((double *)aa,ldaa,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa((double *)aa,ldaa,n,ipvt,&info); } st[0][7] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl((double *)aa,ldaa,n,ipvt,b,0); } st[1][7] = (dtime() - t1)/ntimes; total = st[0][7] + st[1][7]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][7] = total; st[3][7] = ops/(1.0e3*total); st[4][7] = 2.0e3/st[3][7]; st[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(st[3][3],st[3][7]))" */ /* kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } */ if ( st[3][3] < ZERO ) st[3][3] = ZERO; if ( st[3][7] < ZERO ) st[3][7] = ZERO; kf = st[3][3]; if ( st[3][7] < st[3][3] ) kf = st[3][7]; kflops = (int)(kf + 0.5); return kflops; }
int main(int argc, char **argv) { /******************************************************************************/ /* Purpose: MAIN is the main program for LINPACK_BENCH. Discussion: LINPACK_BENCH drives the double precision LINPACK benchmark program. Modified: 25 July 2008 Parameters: N is the problem size. */ # define N 1000 # define LDA ( N + 1 ) double *a; double a_max; double *b; double b_max; double cray = 0.056; double eps; int i; int info; int *ipvt; int j; int job; double ops; double *resid; double resid_max; double residn; double *rhs; double t1; double t2; double time[6]; double total; double *x; int arg = argc > 1 ? argv[1][0] - '0' : 3; if (arg == 0) return 0; timestamp ( ); printf ( "\n" ); printf ( "LINPACK_BENCH\n" ); printf ( " C version\n" ); printf ( "\n" ); printf ( " The LINPACK benchmark.\n" ); printf ( " Language: C\n" ); printf ( " Datatype: Double precision real\n" ); printf ( " Matrix order N = %d\n", N ); printf ( " Leading matrix dimension LDA = %d\n", LDA ); ops = ( double ) ( 2 * N * N * N ) / 3.0 + 2.0 * ( double ) ( N * N ); /* Allocate space for arrays. */ a = r8mat_gen ( LDA, N ); b = ( double * ) malloc ( N * sizeof ( double ) ); ipvt = ( int * ) malloc ( N * sizeof ( int ) ); resid = ( double * ) malloc ( N * sizeof ( double ) ); rhs = ( double * ) malloc ( N * sizeof ( double ) ); x = ( double * ) malloc ( N * sizeof ( double ) ); a_max = 0.0; for ( j = 0; j < N; j++ ) { for ( i = 0; i < N; i++ ) { a_max = r8_max ( a_max, a[i+j*LDA] ); } } for ( i = 0; i < N; i++ ) { x[i] = 1.0; } for ( i = 0; i < N; i++ ) { b[i] = 0.0; for ( j = 0; j < N; j++ ) { b[i] = b[i] + a[i+j*LDA] * x[j]; } } t1 = cpu_time ( ); info = dgefa ( a, LDA, N, ipvt ); if ( info != 0 ) { printf ( "\n" ); printf ( "LINPACK_BENCH - Fatal error!\n" ); printf ( " The matrix A is apparently singular.\n" ); printf ( " Abnormal end of execution.\n" ); return 1; } t2 = cpu_time ( ); time[0] = t2 - t1; t1 = cpu_time ( ); job = 0; dgesl ( a, LDA, N, ipvt, b, job ); t2 = cpu_time ( ); time[1] = t2 - t1; total = time[0] + time[1]; free ( a ); /* Compute a residual to verify results. */ a = r8mat_gen ( LDA, N ); for ( i = 0; i < N; i++ ) { x[i] = 1.0; } for ( i = 0; i < N; i++ ) { rhs[i] = 0.0; for ( j = 0; j < N; j++ ) { rhs[i] = rhs[i] + a[i+j*LDA] * x[j]; } } for ( i = 0; i < N; i++ ) { resid[i] = -rhs[i]; for ( j = 0; j < N; j++ ) { resid[i] = resid[i] + a[i+j*LDA] * b[j]; } } resid_max = 0.0; for ( i = 0; i < N; i++ ) { resid_max = r8_max ( resid_max, r8_abs ( resid[i] ) ); } b_max = 0.0; for ( i = 0; i < N; i++ ) { b_max = r8_max ( b_max, r8_abs ( b[i] ) ); } eps = r8_epsilon ( ); residn = resid_max / ( double ) N / a_max / b_max / eps; time[2] = total; if ( 0.0 < total ) { time[3] = ops / ( 1.0E+06 * total ); } else { time[3] = -1.0; } time[4] = 2.0 / time[3]; time[5] = total / cray; printf ( "\n" ); printf ( " Norm. Resid Resid MACHEP X[1] X[N]\n" ); printf ( "\n" ); printf ( " %14f %14f %14e %14f %14f\n", residn, resid_max, eps, b[0], b[N-1] ); printf ( "\n" ); printf ( " Factor Solve Total Unit Cray-Ratio\n" ); printf ( "\n" ); printf ( " %9f %9f %9f %9f %9f\n", time[0], time[1], time[2], time[4], time[5] ); printf ( "\n" ); printf ( "Unrolled Double Precision %9f Mflops\n", time[3]); printf ( "\n" ); free ( a ); free ( b ); free ( ipvt ); free ( resid ); free ( rhs ); free ( x ); /* Terminate. */ printf ( "\n" ); printf ( "LINPACK_BENCH\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; # undef LDA # undef N }
static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; char buf[80]; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=second(); for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,1); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,1); tdgesl += second()-t1; } for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,0); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,0); tdgesl += second()-t1; } totalt=second()-totalt; if (totalt<0.5 || tdgefa+tdgesl<0.2) return(0.); kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; sprintf(buf,"%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps,totalt,100.*tdgefa/totalt, 100.*tdgesl/totalt,100.*toverhead/totalt, kflops/1000.0); /* Callback to Java to print buf */ pass_jni_msg(buf); if(totalt > 10.)//only send data to server when bench is done pass_result("mflops", (float) (kflops/1000.0)); return(totalt); }
int main(int argc, char ** argv) { int i; // do we have verbose output? bool ga_testing = false; if (argc > 1) { for (i = 1; i < argc; ++i) { if (!strcmp(argv[1],"-ga")) { ga_testing = true; break; } } } double ** a = (double **)malloc(sizeof(double) * N); for (i = 0; i < N; ++i) a[i] = (double *)malloc(sizeof(double) * NP1); double * b = (double *)malloc(sizeof(double) * N); double * x = (double *)malloc(sizeof(double) * N); int * ipvt = (int *)malloc(sizeof(int) * N); // calculate operations per timeInSeconds double ops = (2.0 * ((double)N * N * N)) / 3.0 + 2.0 * ((double)N * N); // generate matrix matgen(a,b); // get starting time //struct timespec start, stop; //clock_gettime(CLOCK_REALTIME,&start); // what we're timing dgefa(a,ipvt); dgesl(a,ipvt,b); // calculate run time //clock_gettime(CLOCK_REALTIME,&stop); double run_time = 0;//(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0; // clean up free(ipvt); free(x); free(b); for (i = 0; i < N; ++i) free(a[i]); free(a); // report runtime if (ga_testing) fprintf(stdout,"%f",run_time); else fprintf(stdout,"\nlpbench (Std. C) run time: %f\n\n",run_time); fflush(stdout); // done return 0; }
jobject Java_rs_pedjaapps_Linpack_MainActivity_runLinpack (JNIEnv* env, jobject thiz, jclass resultClass) { __android_log_write (ANDROID_LOG_DEBUG, "linpack-jni.c", "running neon linpack"); static REAL aa[200*200],a[200*201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,tm2,epsn,x1,x2; REAL mflops; static int ipvt[200],n,i,j,ntimes,info,lda,ldaa; int endit, pass, loop; REAL overhead1, overhead2, time2; REAL max1, max2; char resultchars[1000]; lda = 201; ldaa = 200; cray = .056; n = 100; ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); start_time(); dgefa(a,lda,n,ipvt,&info); end_time(); atime[0][0] = secs; start_time(); dgesl(a,lda,n,ipvt,b,0); end_time(); atime[1][0] = secs; total = atime[0][0] + atime[1][0]; // compute a residual to verify results. for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon(ONE); residn = resid/( n*norma*normx*eps ); epsn = eps; x1 = x[0] - 1; x2 = x[n-1] - 1; atime[2][0] = total; if (total > 0.0) { atime[3][0] = ops/(1.0e6*total); atime[4][0] = 2.0/atime[3][0]; } else { atime[3][0] = 0.0; atime[4][0] = 0.0; } atime[5][0] = total/cray; // ************************************************************************ // * Calculate overhead of executing matgen procedure * // ************************************************************************ pass = -20; loop = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < loop ; i++) { matgen(a,lda,n,b,&norma); } end_time(); overhead1 = secs; if (overhead1 > runSecs) { pass = 0; } if (pass < 0) { if (overhead1 < 0.1) { loop = loop * 10; } else { loop = loop * 2; } } } while (pass < 0); overhead1 = overhead1 / (double)loop; // ************************************************************************ // * Calculate matgen/dgefa passes for runSecs seconds * // ************************************************************************ pass = -20; ntimes = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < ntimes ; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); time2 = secs; if (time2 > runSecs) { pass = 0; } if (pass < 0) { if (time2 < 0.1) { ntimes = ntimes * 10; } else { ntimes = ntimes * 2; } } } while (pass < 0); ntimes = (int)(runSecs * (double)ntimes / time2); if (ntimes == 0) ntimes = 1; // ************************************************************************ // * Execute 5 passes * // ************************************************************************ tm2 = ntimes * overhead1; atime[3][6] = 0; for (j=1 ; j<6 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][6] = atime[3][6] + atime[3][j]; } atime[3][6] = atime[3][6] / 5.0; // ************************************************************************ // * Calculate overhead of executing matgen procedure * // ************************************************************************ start_time(); for ( i = 0 ; i < loop ; i++) { matgen(aa,ldaa,n,b,&norma); } end_time(); overhead2 = secs; overhead2 = overhead2 / (double)loop; // ************************************************************************ // * Execute 5 passes * // ************************************************************************ tm2 = ntimes * overhead2; atime[3][12] = 0; for (j=7 ; j<12 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(aa,ldaa,n,b,&norma); dgefa(aa,ldaa,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][12] = atime[3][12] + atime[3][j]; } atime[3][12] = atime[3][12] / 5.0; // ************************************************************************ // * Use minimum average as overall Mflops rating * // ************************************************************************ mflops = atime[3][6]; if (atime[3][12] < mflops) mflops = atime[3][12]; // ************************************************************************ // * Add results to output file Linpack.txt * // ************************************************************************ max1 = 0; for (i=1 ; i<6 ; i++) { if (atime[3][i] > max1) max1 = atime[3][i]; } max2 = 0; for (i=7 ; i<12 ; i++) { if (atime[3][i] > max2) max2 = atime[3][i]; } if (max1 < max2) max2 = max1; jmethodID jConstructor = (*env)->GetMethodID (env, resultClass, "<init>", "()V"); if (jConstructor == NULL)__android_log_write (ANDROID_LOG_ERROR, "linpack-jni.c", "jConstructor is NULL"); jobject resultObject = (*env)->NewObject (env, resultClass, jConstructor); /*mFlops, residn, resid, epsn, x1, x2;*/ jfieldID jMFlops = (*env)->GetFieldID (env, resultClass, "mflops", "D"); jfieldID jResidn = (*env)->GetFieldID (env, resultClass, "nres", "D"); jfieldID jEpsn = (*env)->GetFieldID (env, resultClass, "precision", "D"); (*env)->SetDoubleField (env, resultObject, jMFlops, max2); (*env)->SetDoubleField (env, resultObject, jResidn, (double) residn); (*env)->SetDoubleField (env, resultObject, jEpsn, (double) epsn); return resultObject; }