static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=second(); for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,1); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,1); tdgesl += second()-t1; } for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,0); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,0); tdgesl += second()-t1; } totalt=second()-totalt; if (totalt<0.5 || tdgefa+tdgesl<0.2) return(0.); kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps,totalt,100.*tdgefa/totalt, 100.*tdgesl/totalt,100.*toverhead/totalt, kflops/1000.0); if(totalt >= 4.0) call_objc_obj(kflops/1000.0); //printA(a,lda,arsize); //TODO ADDED BY ME FOR TESTING PURPOSES return(totalt); }
static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=second(); for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,1); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,1); tdgesl += second()-t1; } for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,0); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,0); tdgesl += second()-t1; } totalt=second()-totalt; if (totalt<0.5 || tdgefa+tdgesl<0.2) return(0.); kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps,totalt,100.*tdgefa/totalt, 100.*tdgesl/totalt,100.*toverhead/totalt, kflops/1000.0); if(totalt > 10.){ //publish the result in the benchmark database publish_linpack_result("http://modev.mine.nu:8070/benchmark/publish_result.php", "1337", "MoSync", "987123ab", "HTC%20Wildfire", "2", kflops/1000.0); } return(totalt); }
static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=second(); for (i=0; i<nreps; i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,1); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,1); tdgesl += second()-t1; } for (i=0; i<nreps; i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,0); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,0); tdgesl += second()-t1; } totalt=second()-totalt; kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; printf("%f\n", kflops); return(totalt); }
int main(int argc, char** argv){ int n,i,j; int *a; if(argc<2){ printf("You not write a size of matrix\n"); exit(-1); } else if(argc==2) n=atof(argv[1]); else{ printf("A lot of arguments\n"); exit(-1); } a=(int*)malloc(n*n*sizeof(int)); matgen(n,a); madef(n,a,argv[1]); free(a); return 0; }
void test01 ( int m, int n ) /******************************************************************************/ /* Purpose: TEST01 compares various algorithms for a given matrix size MxN. Licensing: This code is distributed under the GNU LGPL license. Modified: 27 April 2008 Author: John Burkardt Parameters: Input, int M, N, the number of rows and columns of the matrix. */ { double *a; double seconds; double *x; double *y; a = ( double * ) malloc ( m*n * sizeof ( double ) ); x = ( double * ) malloc ( n * sizeof ( double ) ); y = ( double * ) malloc ( m * sizeof ( double ) ); matgen ( m, n, a, x ); /* seconds = omp_get_wtime ( ); mxv_plain ( m, n, a, x, y ); seconds = omp_get_wtime ( ) - seconds; printf ( " MXV_PLAIN %8d %8d %14f\n", m, n, seconds ); */ seconds = omp_get_wtime ( ); mxv_plain_openmp ( m, n, a, x, y ); seconds = omp_get_wtime ( ) - seconds; printf ( " MXV_PLAIN_OPENMP %8d %8d %14f\n", m, n, seconds ); free ( a ); free ( x ); free ( y ); return; }
static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=dtime(); for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info,roll); tdgefa += dtime()-t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0,roll); tdgesl += dtime()-t1; } totalt=dtime()-totalt; if (totalt<0.5 || tdgefa+tdgesl<0.2) return(0.); kflops=nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps,totalt,100.*tdgefa/totalt, 100.*tdgesl/totalt,100.*toverhead/totalt, kflops); return(totalt); }
int main (int argc, char *argv[]) { static REAL aa[200*200],a[200*201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,tm2,epsn,x1,x2; REAL mflops; static int ipvt[200],n,i,j,ntimes,info,lda,ldaa; int endit, pass, loop; REAL overhead1, overhead2, time2; REAL max1, max2; char was[5][20]; char expect[5][20]; char title[5][20]; int errors; printf("\n"); printf("##########################################\n"); lda = 201; ldaa = 200; cray = .056; n = 100; fprintf(stdout, "%s ", ROLLING); fprintf(stdout, "%s ", PREC); fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n"); fprintf(stdout,"Optimisation %s\n\n",options); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); start_time(); dgefa(a,lda,n,ipvt,&info); end_time(); atime[0][0] = secs; start_time(); dgesl(a,lda,n,ipvt,b,0); end_time(); atime[1][0] = secs; total = atime[0][0] + atime[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon(ONE); residn = resid/( n*norma*normx*eps ); epsn = eps; x1 = x[0] - 1; x2 = x[n-1] - 1; printf("norm resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n", (double)residn, (double)resid, (double)epsn, (double)x1, (double)x2); fprintf(stderr,"Times are reported for matrices of order %5d\n",n); fprintf(stderr,"1 pass times for array with leading dimension of%5d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); atime[2][0] = total; if (total > 0.0) { atime[3][0] = ops/(1.0e6*total); atime[4][0] = 2.0/atime[3][0]; } else { atime[3][0] = 0.0; atime[4][0] = 0.0; } atime[5][0] = total/cray; print_time(0); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ fprintf (stderr,"\nCalculating matgen overhead\n"); pass = -20; loop = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < loop ; i++) { matgen(a,lda,n,b,&norma); } end_time(); overhead1 = secs; fprintf (stderr,"%10d times %6.2f seconds\n", loop, overhead1); if (overhead1 > runSecs) { pass = 0; } if (pass < 0) { if (overhead1 < 0.1) { loop = loop * 10; } else { loop = loop * 2; } } } while (pass < 0); overhead1 = overhead1 / (double)loop; fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1); /************************************************************************ * Calculate matgen/dgefa passes for runSecs seconds * ************************************************************************/ fprintf (stderr,"Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs); pass = -20; ntimes = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < ntimes ; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); time2 = secs; fprintf (stderr,"%10d times %6.2f seconds\n", ntimes, time2); if (time2 > runSecs) { pass = 0; } if (pass < 0) { if (time2 < 0.1) { ntimes = ntimes * 10; } else { ntimes = ntimes * 2; } } } while (pass < 0); ntimes = (int)(runSecs * (double)ntimes / time2); if (ntimes == 0) ntimes = 1; fprintf(stderr,"Passes used %10d \n\n", ntimes); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead1; atime[3][6] = 0; for (j=1 ; j<6 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][6] = atime[3][6] + atime[3][j]; print_time(j); } atime[3][6] = atime[3][6] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][6]); fprintf (stderr,"\nCalculating matgen2 overhead\n"); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ start_time(); for ( i = 0 ; i < loop ; i++) { matgen(aa,ldaa,n,b,&norma); } end_time(); overhead2 = secs; overhead2 = overhead2 / (double)loop; fprintf(stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",ldaa); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead2; atime[3][12] = 0; for (j=7 ; j<12 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(aa,ldaa,n,b,&norma); dgefa(aa,ldaa,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][12] = atime[3][12] + atime[3][j]; print_time(j); } atime[3][12] = atime[3][12] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][12]); /************************************************************************ * Use minimum average as overall Mflops rating * ************************************************************************/ mflops = atime[3][6]; if (atime[3][12] < mflops) mflops = atime[3][12]; fprintf(stderr,"\n"); fprintf(stderr, "%s ", ROLLING); fprintf(stderr, "%s ", PREC); fprintf(stderr," Precision %11.2f Mflops \n\n",mflops); max1 = 0; for (i=1 ; i<6 ; i++) { if (atime[3][i] > max1) max1 = atime[3][i]; } max2 = 0; for (i=7 ; i<12 ; i++) { if (atime[3][i] > max2) max2 = atime[3][i]; } if (max1 < max2) max2 = max1; sprintf(was[0], "%16.1f",(double)residn); sprintf(was[1], "%16.8e",(double)resid); sprintf(was[2], "%16.8e",(double)epsn); sprintf(was[3], "%16.8e",(double)x1); sprintf(was[4], "%16.8e",(double)x2); /* // Values for Watcom sprintf(expect[0], " 0.4"); sprintf(expect[1], " 7.41628980e-014"); sprintf(expect[2], " 1.00000000e-015"); sprintf(expect[3], "-1.49880108e-014"); sprintf(expect[4], "-1.89848137e-014"); // Values for Visual C++ sprintf(expect[0], " 1.7"); sprintf(expect[1], " 7.41628980e-014"); sprintf(expect[2], " 2.22044605e-016"); sprintf(expect[3], "-1.49880108e-014"); sprintf(expect[4], "-1.89848137e-014"); // Values for Ubuntu GCC 32 Bit sprintf(expect[0], " 1.9"); sprintf(expect[1], " 8.39915160e-14"); sprintf(expect[2], " 2.22044605e-16"); sprintf(expect[3], " -6.22835117e-14"); sprintf(expect[4], " -4.16333634e-14"); */ // Values for Ubuntu GCC 32 Bit sprintf(expect[0], " 1.7"); sprintf(expect[1], " 7.41628980e-14"); sprintf(expect[2], " 2.22044605e-16"); sprintf(expect[3], " -1.49880108e-14"); sprintf(expect[4], " -1.89848137e-14"); sprintf(title[0], "norm. resid"); sprintf(title[1], "resid "); sprintf(title[2], "machep "); sprintf(title[3], "x[0]-1 "); sprintf(title[4], "x[n-1]-1 "); if (strtol(opt, NULL, 10) == 0) { sprintf(expect[2], " 8.88178420e-016"); } errors = 0; printf ("\n"); }
main () { static REAL aa[200][200],a[200][201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,t1,tm,tm2; REAL epslon(),second(),kf; static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops; lda = 201; ldaa = 200; cray = .056; n = 100; fprintf(stdout,ROLLING);fprintf(stdout,PREC);fprintf(stdout,"Precision Linpack\n\n"); fprintf(stderr,ROLLING);fprintf(stderr,PREC);fprintf(stderr,"Precision Linpack\n\n"); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time[0][0] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time[1][0] = second() - t1; total = time[0][0] + time[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); printf(" norm. resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf(" %8.1f %16.8e%16.8e%16.8e%16.8e\n", (double)residn, (double)resid, (double)eps, (double)x[0]-1, (double)x[n-1]-1); fprintf(stderr," times are reported for matrices of order %5d\n",n); fprintf(stderr," dgefa dgesl total kflops unit"); fprintf(stderr," ratio\n"); time[2][0] = total; time[3][0] = ops/(1.0e3*total); time[4][0] = 2.0e3/time[3][0]; time[5][0] = total/cray; fprintf(stderr," times for array with leading dimension of%5d\n",lda); print_time(0); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time[0][1] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time[1][1] = second() - t1; total = time[0][1] + time[1][1]; time[2][1] = total; time[3][1] = ops/(1.0e3*total); time[4][1] = 2.0e3/time[3][1]; time[5][1] = total/cray; matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time[0][2] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time[1][2] = second() - t1; total = time[0][2] + time[1][2]; time[2][2] = total; time[3][2] = ops/(1.0e3*total); time[4][2] = 2.0e3/time[3][2]; time[5][2] = total/cray; ntimes = NTIMES; tm2 = 0.0; t1 = second(); for (i = 0; i < ntimes; i++) { tm = second(); matgen(a,lda,n,b,&norma); tm2 = tm2 + second() - tm; dgefa(a,lda,n,ipvt,&info); } time[0][3] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } time[1][3] = (second() - t1)/ntimes; total = time[0][3] + time[1][3]; time[2][3] = total; time[3][3] = ops/(1.0e3*total); time[4][3] = 2.0e3/time[3][3]; time[5][3] = total/cray; print_time(1); print_time(2); print_time(3); matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time[0][4] = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time[1][4] = second() - t1; total = time[0][4] + time[1][4]; time[2][4] = total; time[3][4] = ops/(1.0e3*total); time[4][4] = 2.0e3/time[3][4]; time[5][4] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time[0][5] = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time[1][5] = second() - t1; total = time[0][5] + time[1][5]; time[2][5] = total; time[3][5] = ops/(1.0e3*total); time[4][5] = 2.0e3/time[3][5]; time[5][5] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time[0][6] = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time[1][6] = second() - t1; total = time[0][6] + time[1][6]; time[2][6] = total; time[3][6] = ops/(1.0e3*total); time[4][6] = 2.0e3/time[3][6]; time[5][6] = total/cray; ntimes = NTIMES; tm2 = 0; t1 = second(); for (i = 0; i < ntimes; i++) { tm = second(); matgen(aa,ldaa,n,b,&norma); tm2 = tm2 + second() - tm; dgefa(aa,ldaa,n,ipvt,&info); } time[0][7] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } time[1][7] = (second() - t1)/ntimes; total = time[0][7] + time[1][7]; time[2][7] = total; time[3][7] = ops/(1.0e3*total); time[4][7] = 2.0e3/time[3][7]; time[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(time[3][3],time[3][7]))" */ kf = (time[3][3] < time[3][7]) ? time[3][3] : time[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } fprintf(stderr," times for array with leading dimension of%4d\n",ldaa); print_time(4); print_time(5); print_time(6); print_time(7); fprintf(stderr,ROLLING);fprintf(stderr,PREC); fprintf(stderr," Precision %5d Kflops ; %d Reps \n",kflops,NTIMES); }
int main(int argc, char ** argv) { int i; // do we have verbose output? bool ga_testing = false; if (argc > 1) { for (i = 1; i < argc; ++i) { if (!strcmp(argv[1],"-ga")) { ga_testing = true; break; } } } double ** a = (double **)malloc(sizeof(double) * N); for (i = 0; i < N; ++i) a[i] = (double *)malloc(sizeof(double) * NP1); double * b = (double *)malloc(sizeof(double) * N); double * x = (double *)malloc(sizeof(double) * N); int * ipvt = (int *)malloc(sizeof(int) * N); // calculate operations per timeInSeconds double ops = (2.0 * ((double)N * N * N)) / 3.0 + 2.0 * ((double)N * N); // generate matrix matgen(a,b); // get starting time //struct timespec start, stop; //clock_gettime(CLOCK_REALTIME,&start); // what we're timing dgefa(a,ipvt); dgesl(a,ipvt,b); // calculate run time //clock_gettime(CLOCK_REALTIME,&stop); double run_time = 0;//(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0; // clean up free(ipvt); free(x); free(b); for (i = 0; i < N; ++i) free(a[i]); free(a); // report runtime if (ga_testing) fprintf(stdout,"%f",run_time); else fprintf(stdout,"\nlpbench (Std. C) run time: %f\n\n",run_time); fflush(stdout); // done return 0; }
main () #endif { static REAL aa[ORDER2][ORDER2],a[ORDER2][ORDER2P1],b[ORDER2],x[ORDER2]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps; REAL kf; double t1,tm,tm2,dtime(); static int ipvt[ORDER2],n,i,ntimes,info,lda,ldaa,kflops; lda = ORDER2P1; ldaa = ORDER2; cray = .056; n = ORDER; #ifdef OMPC { int c; extern char *optarg; while ((c = getopt (argc, argv, "b:")) != EOF) { switch (c) { case 'b': bf = atoi (optarg); break; } } } if (omp_get_max_threads () != 1) { printf ("OpenMP(%d threads)\n", omp_get_max_threads ()); } else { printf ("OpenMP(1 thread)\n"); } #endif printf(ROLLING); printf(PREC); printf("Precision Linpack\n\n"); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info); st[0][0] = dtime() - t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0); st[1][0] = dtime() - t1; total = st[0][0] + st[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); printf(" norm. resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%8.1f %16.8e%16.8e%16.8e%16.8e\n", (double)residn, (double)resid, (double)eps, (double)x[0]-1, (double)x[n-1]-1); printf(" times are reported for matrices of order %5d\n",n); printf(" dgefa dgesl total kflops unit"); printf(" ratio\n"); st[2][0] = total; st[3][0] = ops/(1.0e3*total); st[4][0] = 2.0e3/st[3][0]; st[5][0] = total/cray; printf(" times for array with leading dimension of%5d\n",lda); print_time(0); matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info); st[0][1] = dtime() - t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0); st[1][1] = dtime() - t1; total = st[0][1] + st[1][1]; st[2][1] = total; st[3][1] = ops/(1.0e3*total); st[4][1] = 2.0e3/st[3][1]; st[5][1] = total/cray; matgen(a,lda,n,b,&norma); t1 = dtime(); dgefa(a,lda,n,ipvt,&info); st[0][2] = dtime() - t1; t1 = dtime(); dgesl(a,lda,n,ipvt,b,0); st[1][2] = dtime() - t1; total = st[0][2] + st[1][2]; st[2][2] = total; st[3][2] = ops/(1.0e3*total); st[4][2] = 2.0e3/st[3][2]; st[5][2] = total/cray; ntimes = NTIMES; tm2 = 0.0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen(a,lda,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa(a,lda,n,ipvt,&info); } st[0][3] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } st[1][3] = (dtime() - t1)/ntimes; total = st[0][3] + st[1][3]; st[2][3] = total; st[3][3] = ops/(1.0e3*total); st[4][3] = 2.0e3/st[3][3]; st[5][3] = total/cray; print_time(1); print_time(2); print_time(3); matgen(aa,ldaa,n,b,&norma); t1 = dtime(); dgefa(aa,ldaa,n,ipvt,&info); st[0][4] = dtime() - t1; t1 = dtime(); dgesl(aa,ldaa,n,ipvt,b,0); st[1][4] = dtime() - t1; total = st[0][4] + st[1][4]; st[2][4] = total; st[3][4] = ops/(1.0e3*total); st[4][4] = 2.0e3/st[3][4]; st[5][4] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = dtime(); dgefa(aa,ldaa,n,ipvt,&info); st[0][5] = dtime() - t1; t1 = dtime(); dgesl(aa,ldaa,n,ipvt,b,0); st[1][5] = dtime() - t1; total = st[0][5] + st[1][5]; st[2][5] = total; st[3][5] = ops/(1.0e3*total); st[4][5] = 2.0e3/st[3][5]; st[5][5] = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = dtime(); dgefa(aa,ldaa,n,ipvt,&info); st[0][6] = dtime() - t1; t1 = dtime(); dgesl(aa,ldaa,n,ipvt,b,0); st[1][6] = dtime() - t1; total = st[0][6] + st[1][6]; st[2][6] = total; st[3][6] = ops/(1.0e3*total); st[4][6] = 2.0e3/st[3][6]; st[5][6] = total/cray; ntimes = NTIMES; tm2 = 0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen(aa,ldaa,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa(aa,ldaa,n,ipvt,&info); } st[0][7] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } st[1][7] = (dtime() - t1)/ntimes; total = st[0][7] + st[1][7]; st[2][7] = total; st[3][7] = ops/(1.0e3*total); st[4][7] = 2.0e3/st[3][7]; st[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(st[3][3],st[3][7]))" */ /* kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } */ if ( st[3][3] < ZERO ) st[3][3] = ZERO; if ( st[3][7] < ZERO ) st[3][7] = ZERO; kf = st[3][3]; if ( st[3][7] < st[3][3] ) kf = st[3][7]; kflops = (int)(kf + 0.5); printf(" times for array with leading dimension of%5d\n",ldaa); print_time(4); print_time(5); print_time(6); print_time(7); printf(ROLLING); printf(PREC); printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES); }
int mmcase0(int MFLOP, int CACHESIZE, char TA, char TB, int M, int N, int K, SCALAR alpha, int lda, int ldb, SCALAR beta, int ldc) { char *pc; #ifdef TREAL char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %6.2f %5.1f %5.2f %3s\n"; #define MALPH alpha #define MBETA beta TYPE betinv, bet=beta; #else #define MALPH *alpha, alpha[1] #define MBETA *beta, beta[1] char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %5.1f %5.1f %6.2f %6.1f %4.2f %3s\n"; TYPE betinv[2], *bet=beta; #endif int nreps, incA, incB, incC, inc, nmat, k; TYPE *c, *C, *a, *A, *b, *B, *st; int ii, jj, i, j=0, PASSED, nerrs; double t0, t1, t2, t3, mflop, mf, mops; TYPE maxval, f1, ferr; static TYPE feps=0.0; static int itst=1; enum ATLAS_TRANS TAc, TBc; void *vp; #ifdef TCPLX if (*beta == 0.0 && beta[1] == 0.0) betinv[0] = betinv[1] = 0.0; else if (beta[1] == 0.0) { betinv[0] = 1 / *beta; betinv[1] = 0.0; } else { t0 = *beta; t1 = beta[1]; if (Mabs(t1) <= Mabs(t0)) { t2 = t1 / t0; betinv[0] = t0 = 1.0 / (t0 + t1*t2); betinv[1] = -t0 * t2; } else { t2 = t0 / t1; betinv[1] = t0 = -1.0 / (t1 + t0*t2); betinv[0] = -t2 * t0; } } mops = ( ((8.0*M)*N)*K ) / 1000000.0; #else if (beta != 0.0) betinv = 1.0 / beta; else betinv = beta; mops = ( ((2.0*M)*N)*K ) / 1000000.0; #endif nreps = MFLOP / mops; if (nreps < 1) nreps = 1; if (TA == 'n' || TA == 'N') { TAc = AtlasNoTrans; incA = lda * K; } else { if (TA == 'c' || TA == 'C') TAc = AtlasConjTrans; else TAc = AtlasTrans; incA = lda * M; } if (TB == 'n' || TB == 'N') { incB = ldb * N; TBc = AtlasNoTrans; } else { incB = ldb * K; if (TB == 'c' || TB == 'C') TBc = AtlasConjTrans; else TBc = AtlasTrans; } incC = ldc*N; inc = incA + incB + incC; i = M*K + K*N + M*N; /* amount of inc actually referenced */ /* This is a hack; change to use of flushcache instead. */ nmat = ((CACHESIZE/ATL_sizeof) + i)/i; vp = malloc(ATL_MulBySize(nmat*inc)+ATL_Cachelen); ATL_assert(vp); C = c = ATL_AlignPtr(vp); a = A = C + incC; b = B = A + incA; st = C + nmat*inc; matgen(inc, nmat, C, inc, M*N); #ifdef DEBUG printmat("A0", M, K, A, lda); printmat("B0", K, N, B, ldb); printmat("C0", M, N, C, ldc); #endif t0 = time00(); for (k=nreps; k; k--) { trusted_gemm(TAc, TBc, M, N, K, alpha, a, lda, b, ldb, bet, c, ldc); c += inc; a += inc; b += inc; if (c == st) { c = C; a = A; b = B; if (bet == beta) bet = betinv; else bet = beta; } } t1 = time00() - t0; t1 /= nreps; if (t1 <= 0.0) mflop = t1 = 0.0; else /* flop rates actually 8MNK+12MN & 2MNK + 2MN, resp */ mflop = mops / t1; printf(form, itst, TA, TB, M, N, K, MALPH, MBETA, t1, mflop, 1.0, "---"); #ifdef DEBUG printmat("C", M, N, C, ldc); #endif matgen(inc, nmat, C, inc, M*N); t0 = time00(); for (k=nreps; k; k--) { test_gemm(TAc, TBc, M, N, K, alpha, a, lda, b, ldb, bet, c, ldc); c += inc; a += inc; b += inc; if (c == st) { c = C; a = A; b = B; if (bet == beta) bet = betinv; else bet = beta; } } t2 = time00() - t0; t2 /= nreps; if (t2 <= 0.0) t2 = mflop = 0.0; else mflop = mops / t2; pc = "---"; if (t1 == t2) t3 = 1.0; else if (t2 != 0.0) t3 = t1/t2; else t3 = 0.0; printf(form, itst++, TA, TB, M, N, K, MALPH, MBETA, t2, mflop, t3, pc); free(vp); return(1); }
main(int nargs, char *args[]) /* * tst <tst> <# TA> <TA's> <# TB's> <TB's> <M0> <MN> <incM> <N0> <NN> <incN> * <K0> <KN> <incK> <# alphas> <alphas> <# betas> <betas> * */ { int M0, MN, incM, N0, NN, incN, K0, KN, incK, lda, ldb, ldc, MFLOP; int i, k, m, n, im, in, ik, ita, itb, ia, ib, nTA, nTB, nalph, nbeta; int itst=0, ipass=0, TEST, LDA_IS_M, MSAME=0, KSAME=0; int ndiag, nuplo, nside; TYPE *alph, *beta, *A, *B, *C, *D=NULL; #ifdef TREAL TYPE bet1 = 1.0, alp1 = -1.0; #else TYPE bet1[2] = {1.0, 0.0}, alp1[2] = {-1.0, 0.0}; #endif char TA, TB; enum ATLAS_SIDE *Side; enum ATLAS_UPLO *Uplo; enum ATLAS_TRANS *TransA, *TransB, TAc, TBc; enum ATLAS_DIAG *Diag; int CACHESIZE; GetFlags(nargs, args, &TEST, &nside, &Side, &nuplo, &Uplo, &nTA, &TransA, &nTB, &TransB, &ndiag, &Diag, &M0, &MN, &incM, &N0, &NN, &incN, &K0, &KN, &incK, &nalph, &alph, &nbeta, &beta, &LDA_IS_M, &MFLOP,&CACHESIZE); if (M0 == -1) { MSAME = 1; M0 = MN = incM = NN; } if (K0 == -1) { KSAME = 1; K0 = KN = incK = NN; } if (!MFLOP) { A = malloc(MN*KN*ATL_sizeof); B = malloc(NN*KN*ATL_sizeof); C = malloc(MN*NN*ATL_sizeof); if (TEST) D = malloc(MN*NN*ATL_sizeof); else D = NULL; if (!A || !B || !C || (TEST && !D)) { fprintf(stderr, "Not enough memory to run tests!!\n"); exit(-1); } } /* * Page the code in from disk, so first timing doesn't blow */ if (MFLOP) { mmcase0(10, 1, 'n', 'n', 100, 100, 100, alp1, 100, 100, bet1, 100); mmcase0(10, 1, 'n', 't', 100, 100, 100, alp1, 100, 100, bet1, 100); mmcase0(10, 1, 't', 'n', 100, 100, 100, alp1, 100, 100, bet1, 100); mmcase0(10, 1, 't', 't', 100, 100, 100, alp1, 100, 100, bet1, 100); } else { m = Mmin(100, MN); k = Mmin(100, KN); n = Mmin(100, NN); matgen(m, k, A, m, m*k); matgen(k, n, B, k, n*k); matgen(m, n, C, m, m*n); TA = TB = 'N'; TAc = TBc = AtlasNoTrans; trusted_gemm(TAc, TBc, m, n, k, alp1, A, m, B, k, bet1, C, m); test_gemm(TAc, TBc, m, n, k, alp1, A, m, B, k, bet1, C, m); } #ifdef TREAL printf("\nTEST TA TB M N K alpha beta Time Mflop SpUp PASS\n"); printf("==== == == === === === ===== ===== ====== ===== ==== ====\n\n"); #else printf("\nTEST TA TB M N K alpha beta Time Mflop SpUp PASS\n"); printf("==== == == === === === ===== ===== ===== ===== ====== ===== ==== ====\n\n"); #endif for (im=M0; im <= MN; im += incM) { for (n=N0; n <= NN; n += incN) { if (MSAME) m = n; else m = im; for (ik=K0; ik <= KN; ik += incK) { if (KSAME) k = n; else k = ik; for (ita=0; ita != nTA; ita++) { if (TransA[ita] == AtlasNoTrans) TA = 'N'; else if (TransA[ita] == AtlasTrans) TA = 'T'; else if (TransA[ita] == AtlasConjTrans) TA = 'C'; for (itb=0; itb != nTB; itb++) { if (TransB[itb] == AtlasNoTrans) TB = 'N'; else if (TransB[itb] == AtlasTrans) TB = 'T'; else if (TransB[itb] == AtlasConjTrans) TB = 'C'; for (ia=0; ia != nalph; ia++) { for (ib=0; ib != nbeta; ib++) { itst++; if (LDA_IS_M) { if (TA == 'n' || TA == 'N') lda = m; else lda = k; if (TB == 'n' || TB == 'N') ldb = k; else ldb = n; ldc = m; } else { if (TA == 'n' || TA == 'N') lda = MN; else lda = KN; if (TB == 'n' || TB == 'N') ldb = KN; else ldb = NN; ldc = MN; } if (MFLOP) { ipass++; #ifdef TREAL mmcase0(MFLOP, CACHESIZE, TA, TB, m, n, k, alph[ia], lda, ldb, beta[ib], ldc); #else mmcase0(MFLOP, CACHESIZE, TA, TB, m, n, k, alph+(ia SHIFT), lda, ldb, beta+(ib SHIFT), ldc); #endif } else { #ifdef TREAL ipass += mmcase(TEST, CACHESIZE, TA, TB, m, n, k, alph[ia], A, lda, B, ldb, beta[ib], C, ldc, D,ldc); #else ipass += mmcase(TEST, CACHESIZE, TA, TB, m, n, k, alph+(ia SHIFT), A, lda, B, ldb, beta+(ib SHIFT), C, ldc, D,ldc); #endif } } } } } } } } if (TEST && !MFLOP) printf("\nNTEST=%d, NUMBER PASSED=%d, NUMBER FAILURES=%d\n", itst, ipass, itst-ipass); else printf("\nDone with %d timing runs\n",itst); free(Side); free(Uplo); free(TransA); free(TransB); free(Diag); free(alph); free(beta); if (!MFLOP) { free(A); free(B); free(C); if (D) free(D); } exit(0); }
main () { static REAL aa[200*200],a[200*201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,t1,tm2,epsn,x1,x2; REAL mflops; static int ipvt[200],n,i,j,ntimes,info,lda,ldaa; int Endit, pass, loop; REAL overhead1, overhead2, time1, time2; FILE *outfile; char *compiler, *options, general[9][80] = {" "}; outfile = fopen("Linpack.txt","a+"); if (outfile == NULL) { printf ("Cannot open results file \n\n"); printf("Press any key\n"); #ifdef DOS Endit = getch(); #endif exit (0); } /************************************************************************ * Enter details of compiler and options used * ************************************************************************/ /*----------------- --------- --------- ---------*/ compiler = "INSERT COMPILER NAME HERE"; options = "INSERT OPTIMISATION OPTIONS HERE"; /* Include -dDP or -dSP and -dROLL or -dUNROLL */ lda = 201; ldaa = 200; cray = .056; n = 100; fprintf(stdout,ROLLING);fprintf(stdout,PREC); fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n"); fprintf(stdout,"Compiler %s\n",compiler); fprintf(stdout,"Optimisation %s\n\n",options); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); atime[0][0] = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); atime[1][0] = second() - t1; total = atime[0][0] + atime[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon(ONE); residn = resid/( n*norma*normx*eps ); epsn = eps; x1 = x[0] - 1; x2 = x[n-1] - 1; printf("norm resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n", (double)residn, (double)resid, (double)epsn, (double)x1, (double)x2); fprintf(stderr,"Times are reported for matrices of order %5d\n",n); fprintf(stderr,"1 pass times for array with leading dimension of%5d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); atime[2][0] = total; if (total > 0.0) { atime[3][0] = ops/(1.0e6*total); atime[4][0] = 2.0/atime[3][0]; } else { atime[3][0] = 0.0; atime[4][0] = 0.0; } atime[5][0] = total/cray; print_time(0); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ fprintf (stderr,"\nCalculating matgen overhead\n"); pass = -20; loop = NTIMES; do { time1 = second(); pass = pass + 1; for ( i = 0 ; i < loop ; i++) { matgen(a,lda,n,b,&norma); } time2 = second(); overhead1 = (time2 - time1); fprintf (stderr,"%10d times %6.2f seconds\n", loop, overhead1); if (overhead1 > 5.0) { pass = 0; } if (pass < 0) { if (overhead1 < 0.1) { loop = loop * 10; } else { loop = loop * 2; } } } while (pass < 0); overhead1 = overhead1 / (double)loop; fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1); /************************************************************************ * Calculate matgen/dgefa passes for 5 seconds * ************************************************************************/ fprintf (stderr,"Calculating matgen/dgefa passes for 5 seconds\n"); pass = -20; ntimes = NTIMES; do { time1 = second(); pass = pass + 1; for ( i = 0 ; i < ntimes ; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } time2 = second() - time1; fprintf (stderr,"%10d times %6.2f seconds\n", ntimes, time2); if (time2 > 5.0) { pass = 0; } if (pass < 0) { if (time2 < 0.1) { ntimes = ntimes * 10; } else { ntimes = ntimes * 2; } } } while (pass < 0); ntimes = 5.0 * (double)ntimes / time2; if (ntimes == 0) ntimes = 1; fprintf (stderr,"Passes used %10d \n\n", ntimes); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",lda); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead1; atime[3][6] = 0; for (j=1 ; j<6 ; j++) { t1 = second(); for (i = 0; i < ntimes; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } atime[0][j] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } atime[1][j] = (second() - t1)/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][6] = atime[3][6] + atime[3][j]; print_time(j); } atime[3][6] = atime[3][6] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][6]); fprintf (stderr,"\nCalculating matgen2 overhead\n"); /************************************************************************ * Calculate overhead of executing matgen procedure * ************************************************************************/ time1 = second(); for ( i = 0 ; i < loop ; i++) { matgen(aa,ldaa,n,b,&norma); } time2 = second(); overhead2 = (time2 - time1); overhead2 = overhead2 / (double)loop; fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2); fprintf(stderr,"Times for array with leading dimension of%4d\n\n",ldaa); fprintf(stderr," dgefa dgesl total Mflops unit"); fprintf(stderr," ratio\n"); /************************************************************************ * Execute 5 passes * ************************************************************************/ tm2 = ntimes * overhead2; atime[3][12] = 0; for (j=7 ; j<12 ; j++) { t1 = second(); for (i = 0; i < ntimes; i++) { matgen(aa,ldaa,n,b,&norma); dgefa(aa,ldaa,n,ipvt,&info ); } atime[0][j] = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } atime[1][j] = (second() - t1)/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][12] = atime[3][12] + atime[3][j]; print_time(j); } atime[3][12] = atime[3][12] / 5.0; fprintf (stderr,"Average %11.2f\n", (double)atime[3][12]); /************************************************************************ * Use minimum average as overall Mflops rating * ************************************************************************/ mflops = atime[3][6]; if (atime[3][12] < mflops) mflops = atime[3][12]; fprintf(stderr,"\n"); fprintf(stderr,ROLLING);fprintf(stderr,PREC); fprintf(stderr," Precision %11.2f Mflops \n\n",mflops); what_date(); /************************************************************************ * Type details of hardware, software etc. * ************************************************************************/ printf ("Enter the following data which will be " "appended to file Linpack.txt \n\n"); printf ("PC Supplier/model ?\n "); scanf ("%[^\n]", general[1]); fflush (stdin); printf ("CPU ?\n "); scanf ("%[^\n]", general[2]); fflush (stdin); printf ("Clock MHz ?\n "); scanf ("%[^\n]", general[3]); fflush (stdin); printf ("Cache ?\n "); scanf ("%[^\n]", general[4]); fflush (stdin); printf ("Chipset/options ?\n "); scanf ("%[^\n]", general[5]); fflush (stdin); printf ("OS/DOS version ?\n "); scanf ("%[^\n]", general[6]); fflush (stdin); printf ("Your name ?\n "); scanf ("%[^\n]", general[7]); fflush (stdin); printf ("Where from ?\n "); scanf ("%[^\n]", general[8]); fflush (stdin); printf ("Mail address ?\n "); scanf ("%[^\n]", general[0]); fflush (stdin); /************************************************************************ * Add results to output file LLloops.txt * ************************************************************************/ fprintf (outfile, "----------------- ----------------- --------- " "--------- ---------\n"); fprintf (outfile, "LINPACK BENCHMARK FOR PCs 'C/C++' n @ 100\n\n"); fprintf (outfile, "Month run %d/%d\n", this_month, this_year); fprintf (outfile, "PC model %s\n", general[1]); fprintf (outfile, "CPU %s\n", general[2]); fprintf (outfile, "Clock MHz %s\n", general[3]); fprintf (outfile, "Cache %s\n", general[4]); fprintf (outfile, "Options %s\n", general[5]); fprintf (outfile, "OS/DOS %s\n", general[6]); fprintf (outfile, "Compiler %s\n", compiler); fprintf (outfile, "OptLevel %s\n", options); fprintf (outfile, "Run by %s\n", general[7]); fprintf (outfile, "From %s\n", general[8]); fprintf (outfile, "Mail %s\n\n", general[0]); fprintf(outfile, "Rolling %s\n",ROLLING); fprintf(outfile, "Precision %s\n",PREC); fprintf(outfile, "norm. resid %16.1f\n",(double)residn); fprintf(outfile, "resid %16.8e\n",(double)resid); fprintf(outfile, "machep %16.8e\n",(double)epsn); fprintf(outfile, "x[0]-1 %16.8e\n",(double)x1); fprintf(outfile, "x[n-1]-1 %16.8e\n",(double)x2); fprintf(outfile, "matgen 1 seconds %16.5f\n",overhead1); fprintf(outfile, "matgen 2 seconds %16.5f\n",overhead2); fprintf(outfile, "Repetitions %16d\n",ntimes); fprintf(outfile, "Leading dimension %16d\n",lda); fprintf(outfile, " dgefa dgesl " " total Mflops\n"); fprintf(outfile, "1 pass seconds %16.5f %9.5f %9.5f\n", atime[0][0], atime[1][0], atime[2][0]); for (i=1 ; i<6 ; i++) { fprintf(outfile, "Repeat seconds %16.5f %9.5f %9.5f %9.2f\n", atime[0][i], atime[1][i], atime[2][i], atime[3][i]); } fprintf(outfile, "Average %46.2f\n",atime[3][6]); fprintf(outfile, "Leading dimension %16d\n",ldaa); for (i=7 ; i<12 ; i++) { fprintf(outfile, "Repeat seconds %16.5f %9.5f %9.5f %9.2f\n", atime[0][i], atime[1][i], atime[2][i], atime[3][i]); } fprintf(outfile, "Average %46.2f\n\n",atime[3][12]); fclose (outfile); printf("\nPress any key\n"); #ifdef DOS Endit = getch(); #endif }
int mmcase(int TEST, int CACHESIZE, char TA, char TB, int M, int N, int K, SCALAR alpha, TYPE *A, int lda, TYPE *B, int ldb, SCALAR beta, TYPE *C, int ldc, TYPE *D, int ldd) { char *pc; #ifdef TREAL char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %6.2f %5.1f %5.2f %3s\n"; #define MALPH alpha #define MBETA beta #else #define MALPH *alpha, alpha[1] #define MBETA *beta, beta[1] char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %5.1f %5.1f %6.2f %6.1f %4.2f %3s\n"; #endif int ii, jj, i, j=0, PASSED, nerrs; double t0, t1, t2, t3, mflop; TYPE maxval, f1, ferr; static TYPE feps=0.0; static int itst=1; /*int *L2, nL2=(1.3*L2SIZE)/sizeof(int);*/ enum ATLAS_TRANS TAc, TBc; double l2ret; if (!TEST) D = C; /*if (nL2) L2 = malloc(nL2*sizeof(int));*/ l2ret = ATL_flushcache( CACHESIZE ); if (TA == 'n' || TA == 'N') { matgen(M, K, A, lda, K*1112); TAc = AtlasNoTrans; } else { matgen(K, M, A, lda, K*1112); if (TA == 'c' || TA == 'C') TAc = AtlasConjTrans; else TAc = AtlasTrans; } if (TB == 'n' || TB == 'N') { matgen(K, N, B, ldb, N*2238); TBc = AtlasNoTrans; } else { matgen(N, K, B, ldb, N*2238); if (TB == 'c' || TB == 'C') TBc = AtlasConjTrans; else TBc = AtlasTrans; } matgen(M, N, C, ldc, M*N); #ifdef DEBUG printmat("A0", M, K, A, lda); printmat("B0", K, N, B, ldb); printmat("C0", M, N, C, ldc); #endif /* if (L2) { for (i=0; i != nL2; i++) L2[i] = 0.0; for (i=0; i != nL2; i++) j += L2[i]; }*/ /* invalidate L2 cache */ l2ret = ATL_flushcache( -1 ); t0 = time00(); trusted_gemm(TAc, TBc, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); t1 = time00() - t0; if (t1 <= 0.0) mflop = t1 = 0.0; else /* flop rates actually 8MNK+12MN & 2MNK + 2MN, resp */ #ifdef TCPLX mflop = ( ((8.0*M)*N)*K ) / (t1*1000000.0); #else mflop = ( ((2.0*M)*N)*K ) / (t1*1000000.0); #endif printf(form, itst, TA, TB, M, N, K, MALPH, MBETA, t1, mflop, 1.0, "---"); #ifdef DEBUG printmat("C", M, N, C, ldc); #endif #ifndef TIMEONLY matgen(M, N, D, ldd, M*N); /* invalidate L2 cache */ l2ret = ATL_flushcache( -1 ); t0 = time00(); test_gemm(TAc, TBc, M, N, K, alpha, A, lda, B, ldb, beta, D, ldd); t2 = time00() - t0; if (t2 <= 0.0) t2 = mflop = 0.0; else #ifdef TCPLX mflop = ( ((8.0*M)*N)*K ) / (t2*1000000.0); #else mflop = ( ((2.0*M)*N)*K ) / (t2*1000000.0); #endif #ifdef DEBUG printmat("D", M, N, D, ldd); #endif if (TEST) { if (feps == 0.0) { #if 0 f1 = feps = 0.5; do { feps = f1; f1 *= 0.5; maxval = 1.0 + f1; } while (maxval != 1.0); printf("feps=%e\n",feps); #else feps = EPS; #endif #ifdef DEBUG printf("feps=%e\n",feps); #endif } #ifdef TREAL ferr = 2.0 * (Mabs(alpha) * 2.0*K*feps + Mabs(beta) * feps) + feps; #else f1 = Mabs(*alpha) + Mabs(alpha[1]); maxval = Mabs(*beta) + Mabs(beta[1]); ferr = 2.0 * (f1*8.0*K*feps + maxval*feps) + feps; #endif PASSED = 1; maxval = 0.0; pc = "YES"; nerrs = ii = jj = 0; for (j=0; j != N; j++) { for (i=0; i != M SHIFT; i++) { f1 = D[i] - C[i]; if (f1 < 0.0) f1 = -f1; if (f1 > ferr) { nerrs++; PASSED = 0; pc = "NO!"; if (f1 > maxval) { maxval=f1; ii = i+1; jj = j+1; } } } D += ldd SHIFT; C += ldc SHIFT; } if (maxval != 0.0) fprintf(stderr, "ERROR: nerr=%d, i=%d, j=%d, maxval=%e\n", nerrs, ii,jj, maxval); } else pc = "---"; if (t1 == t2) t3 = 1.0; else if (t2 != 0.0) t3 = t1/t2; else t3 = 0.0; printf(form, itst++, TA, TB, M, N, K, MALPH, MBETA, t2, mflop, t3, pc); #else itst++; PASSED = 1; #endif /*free(L2);*/ l2ret = ATL_flushcache( 0 ); return(PASSED); }
void main () { static REAL aa[200][200],a[200][201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps; REAL epslon(),kf; #if 0 double t1; double tm; #endif double tm2; double dtime(); static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops; static user_timer second_timer; lda = 201; ldaa = 200; cray = .056; n = 100; printf(ROLLING); printf(PREC); printf("Precision Linpack\n\n"); ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); TimerOn(); dgefa(a,lda,n,ipvt,&info); TimerOff(); st[0][0] = TimerElapsed(); Report( "clinpack(dgefa#1)", st[0][0] ); TimerOn(); dgesl(a,lda,n,ipvt,b,0); TimerOff(); st[1][0] = TimerElapsed(); Report( "clinpack(dgesl#1)", st[1][0] ); total = st[0][0] + st[1][0]; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); printf(" norm. resid resid machep"); printf(" x[0]-1 x[n-1]-1\n"); printf("%8.1f %16.8e%16.8e%16.8e%16.8e\n", (double)residn, (double)resid, (double)eps, (double)x[0]-1, (double)x[n-1]-1); printf(" times are reported for matrices of order %5d\n",n); printf(" dgefa dgesl total kflops unit"); printf(" ratio\n"); st[2][0] = total; st[3][0] = ops/(1.0e3*total); st[4][0] = 2.0e3/st[3][0]; st[5][0] = total/cray; printf(" times for array with leading dimension of%5d\n",lda); print_time(0); matgen(a,lda,n,b,&norma); TimerOn(); dgefa(a,lda,n,ipvt,&info); TimerOff(); st[0][1] = TimerElapsed(); Report( "clinpack(dgefa#2)", st[0][1] ); TimerOn(); dgesl(a,lda,n,ipvt,b,0); TimerOff(); st[1][1] = TimerElapsed(); Report( "clinpack(dgesl#2)", st[1][1] ); total = st[0][1] + st[1][1]; st[2][1] = total; st[3][1] = ops/(1.0e3*total); st[4][1] = 2.0e3/st[3][1]; st[5][1] = total/cray; matgen(a,lda,n,b,&norma); TimerOn(); dgefa(a,lda,n,ipvt,&info); TimerOff(); st[0][2] = TimerElapsed(); Report( "clinpack(dgefa#3)", st[0][2] ); TimerOn(); dgesl(a,lda,n,ipvt,b,0); TimerOff(); st[1][2] = TimerElapsed(); Report( "clinpack(dgesl#3)", st[1][2] ); total = st[0][2] + st[1][2]; st[2][2] = total; st[3][2] = ops/(1.0e3*total); st[4][2] = 2.0e3/st[3][2]; st[5][2] = total/cray; ntimes = NTIMES; tm2 = 0.0; UserTimerOn( &second_timer ); for (i = 0; i < ntimes; i++) { TimerOn(); matgen(a,lda,n,b,&norma); TimerOff(); tm2 = tm2 + TimerElapsed(); dgefa(a,lda,n,ipvt,&info); } UserTimerOff( &second_timer ); st[0][3] = ( UserTimerElapsed( &second_timer ) - tm2)/ntimes; Report( "clinpack(dgefa#4)", st[0][3] ); TimerOn(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } TimerOff(); st[1][3] = TimerElapsed()/ntimes; Report( "clinpack(dgesl#4)", st[1][3] ); total = st[0][3] + st[1][3]; st[2][3] = total; st[3][3] = ops/(1.0e3*total); st[4][3] = 2.0e3/st[3][3]; st[5][3] = total/cray; print_time(1); print_time(2); print_time(3); matgen(aa,ldaa,n,b,&norma); TimerOn(); dgefa(aa,ldaa,n,ipvt,&info); TimerOff(); st[0][4] = TimerElapsed(); Report( "clinpack(dgefa#5)", st[0][4] ); TimerOn(); dgesl(aa,ldaa,n,ipvt,b,0); TimerOff(); st[1][4] = TimerElapsed(); Report( "clinpack(dgesl#5)", st[1][4] ); total = st[0][4] + st[1][4]; st[2][4] = total; st[3][4] = ops/(1.0e3*total); st[4][4] = 2.0e3/st[3][4]; st[5][4] = total/cray; matgen(aa,ldaa,n,b,&norma); TimerOn(); dgefa(aa,ldaa,n,ipvt,&info); TimerOff(); st[0][5] = TimerElapsed(); Report( "clinpack(dgefa#6)", st[0][5] ); TimerOn(); dgesl(aa,ldaa,n,ipvt,b,0); TimerOff(); st[1][5] = TimerElapsed(); Report( "clinpack(dgesl#6)", st[1][5] ); total = st[0][5] + st[1][5]; st[2][5] = total; st[3][5] = ops/(1.0e3*total); st[4][5] = 2.0e3/st[3][5]; st[5][5] = total/cray; matgen(aa,ldaa,n,b,&norma); TimerOn(); dgefa(aa,ldaa,n,ipvt,&info); TimerOff(); st[0][6] = TimerElapsed(); Report( "clinpack(dgefa#7)", st[0][6] ); TimerOn(); dgesl(aa,ldaa,n,ipvt,b,0); TimerOff(); st[1][6] = TimerElapsed(); Report( "clinpack(dgesl#7)", st[1][6] ); total = st[0][6] + st[1][6]; st[2][6] = total; st[3][6] = ops/(1.0e3*total); st[4][6] = 2.0e3/st[3][6]; st[5][6] = total/cray; ntimes = NTIMES; tm2 = 0; UserTimerOn( &second_timer ); for (i = 0; i < ntimes; i++) { TimerOn(); matgen(aa,ldaa,n,b,&norma); TimerOff(); tm2 = tm2 + TimerElapsed(); dgefa(aa,ldaa,n,ipvt,&info); } UserTimerOff( &second_timer ); st[0][7] = ( UserTimerElapsed( &second_timer ) - tm2 ) / ntimes; Report( "clinpack(dgefa#8)", st[0][7] ); TimerOn(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } TimerOff(); st[1][7] = TimerElapsed()/ntimes; Report( "clinpack(dgesl#8)", st[1][7] ); total = st[0][7] + st[1][7]; st[2][7] = total; st[3][7] = ops/(1.0e3*total); st[4][7] = 2.0e3/st[3][7]; st[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(st[3][3],st[3][7]))" */ /* kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } */ if ( st[3][3] < ZERO ) st[3][3] = ZERO; if ( st[3][7] < ZERO ) st[3][7] = ZERO; kf = st[3][3]; if ( st[3][7] < st[3][3] ) kf = st[3][7]; kflops = (int)(kf + 0.5); printf(" times for array with leading dimension of%4d\n",ldaa); print_time(4); print_time(5); print_time(6); print_time(7); printf(ROLLING); printf(PREC); printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES); exit( EXIT_SUCCESS ); }
jobject Java_rs_pedjaapps_Linpack_MainActivity_runLinpack (JNIEnv* env, jobject thiz, jclass resultClass) { __android_log_write (ANDROID_LOG_DEBUG, "linpack-jni.c", "running neon linpack"); static REAL aa[200*200],a[200*201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,tm2,epsn,x1,x2; REAL mflops; static int ipvt[200],n,i,j,ntimes,info,lda,ldaa; int endit, pass, loop; REAL overhead1, overhead2, time2; REAL max1, max2; char resultchars[1000]; lda = 201; ldaa = 200; cray = .056; n = 100; ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); start_time(); dgefa(a,lda,n,ipvt,&info); end_time(); atime[0][0] = secs; start_time(); dgesl(a,lda,n,ipvt,b,0); end_time(); atime[1][0] = secs; total = atime[0][0] + atime[1][0]; // compute a residual to verify results. for (i = 0; i < n; i++) { x[i] = b[i]; } matgen(a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon(ONE); residn = resid/( n*norma*normx*eps ); epsn = eps; x1 = x[0] - 1; x2 = x[n-1] - 1; atime[2][0] = total; if (total > 0.0) { atime[3][0] = ops/(1.0e6*total); atime[4][0] = 2.0/atime[3][0]; } else { atime[3][0] = 0.0; atime[4][0] = 0.0; } atime[5][0] = total/cray; // ************************************************************************ // * Calculate overhead of executing matgen procedure * // ************************************************************************ pass = -20; loop = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < loop ; i++) { matgen(a,lda,n,b,&norma); } end_time(); overhead1 = secs; if (overhead1 > runSecs) { pass = 0; } if (pass < 0) { if (overhead1 < 0.1) { loop = loop * 10; } else { loop = loop * 2; } } } while (pass < 0); overhead1 = overhead1 / (double)loop; // ************************************************************************ // * Calculate matgen/dgefa passes for runSecs seconds * // ************************************************************************ pass = -20; ntimes = NTIMES; do { start_time(); pass = pass + 1; for ( i = 0 ; i < ntimes ; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); time2 = secs; if (time2 > runSecs) { pass = 0; } if (pass < 0) { if (time2 < 0.1) { ntimes = ntimes * 10; } else { ntimes = ntimes * 2; } } } while (pass < 0); ntimes = (int)(runSecs * (double)ntimes / time2); if (ntimes == 0) ntimes = 1; // ************************************************************************ // * Execute 5 passes * // ************************************************************************ tm2 = ntimes * overhead1; atime[3][6] = 0; for (j=1 ; j<6 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(a,lda,n,b,&norma); dgefa(a,lda,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(a,lda,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][6] = atime[3][6] + atime[3][j]; } atime[3][6] = atime[3][6] / 5.0; // ************************************************************************ // * Calculate overhead of executing matgen procedure * // ************************************************************************ start_time(); for ( i = 0 ; i < loop ; i++) { matgen(aa,ldaa,n,b,&norma); } end_time(); overhead2 = secs; overhead2 = overhead2 / (double)loop; // ************************************************************************ // * Execute 5 passes * // ************************************************************************ tm2 = ntimes * overhead2; atime[3][12] = 0; for (j=7 ; j<12 ; j++) { start_time(); for (i = 0; i < ntimes; i++) { matgen(aa,ldaa,n,b,&norma); dgefa(aa,ldaa,n,ipvt,&info ); } end_time(); atime[0][j] = (secs - tm2)/ntimes; start_time(); for (i = 0; i < ntimes; i++) { dgesl(aa,ldaa,n,ipvt,b,0); } end_time(); atime[1][j] = secs/ntimes; total = atime[0][j] + atime[1][j]; atime[2][j] = total; atime[3][j] = ops/(1.0e6*total); atime[4][j] = 2.0/atime[3][j]; atime[5][j] = total/cray; atime[3][12] = atime[3][12] + atime[3][j]; } atime[3][12] = atime[3][12] / 5.0; // ************************************************************************ // * Use minimum average as overall Mflops rating * // ************************************************************************ mflops = atime[3][6]; if (atime[3][12] < mflops) mflops = atime[3][12]; // ************************************************************************ // * Add results to output file Linpack.txt * // ************************************************************************ max1 = 0; for (i=1 ; i<6 ; i++) { if (atime[3][i] > max1) max1 = atime[3][i]; } max2 = 0; for (i=7 ; i<12 ; i++) { if (atime[3][i] > max2) max2 = atime[3][i]; } if (max1 < max2) max2 = max1; jmethodID jConstructor = (*env)->GetMethodID (env, resultClass, "<init>", "()V"); if (jConstructor == NULL)__android_log_write (ANDROID_LOG_ERROR, "linpack-jni.c", "jConstructor is NULL"); jobject resultObject = (*env)->NewObject (env, resultClass, jConstructor); /*mFlops, residn, resid, epsn, x1, x2;*/ jfieldID jMFlops = (*env)->GetFieldID (env, resultClass, "mflops", "D"); jfieldID jResidn = (*env)->GetFieldID (env, resultClass, "nres", "D"); jfieldID jEpsn = (*env)->GetFieldID (env, resultClass, "precision", "D"); (*env)->SetDoubleField (env, resultObject, jMFlops, max2); (*env)->SetDoubleField (env, resultObject, jResidn, (double) residn); (*env)->SetDoubleField (env, resultObject, jEpsn, (double) epsn); return resultObject; }
int clinpack_kflops ( int ntimes ) { static REAL aa[200][200],a[200][201],b[200],x[200]; REAL cray,ops,total,norma,normx; REAL resid,residn,eps; REAL kf; double t1,tm,tm2; static int ipvt[200],n,i,info,lda,ldaa,kflops; #if defined(WIN32) static float one_tick = .0001; #else static long clock_tick = -1; static float one_tick; if ( clock_tick < 1 || clock_tick > 1000) { clock_tick = sysconf( _SC_CLK_TCK ); /* clock_tick is the number of ticks per second */ one_tick = (float) 1 / clock_tick; /* one_tick is the length of time for one tick */ } #endif lda = 201; ldaa = 200; cray = .056; n = 100; ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n); matgen((double *)a,lda,n,b,&norma); t1 = dtime(); dgefa((double *)a,lda,n,ipvt,&info); st[0][0] = dtime() - t1; t1 = dtime(); dgesl((double *)a,lda,n,ipvt,b,0); st[1][0] = dtime() - t1; total = st[0][0] + st[1][0]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; /* compute a residual to verify results. */ for (i = 0; i < n; i++) { x[i] = b[i]; } matgen((double *)a,lda,n,b,&norma); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,(double *)a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { resid = (resid > fabs((double)b[i])) ? resid : fabs((double)b[i]); normx = (normx > fabs((double)x[i])) ? normx : fabs((double)x[i]); } eps = epslon((REAL)ONE); residn = resid/( n*norma*normx*eps ); st[2][0] = total; st[3][0] = ops/(1.0e3*total); st[4][0] = 2.0e3/st[3][0]; st[5][0] = total/cray; matgen((double *)a,lda,n,b,&norma); t1 = dtime(); dgefa((double *)a,lda,n,ipvt,&info); st[0][1] = dtime() - t1; t1 = dtime(); dgesl((double *)a,lda,n,ipvt,b,0); st[1][1] = dtime() - t1; total = st[0][1] + st[1][1]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][1] = total; st[3][1] = ops/(1.0e3*total); st[4][1] = 2.0e3/st[3][1]; st[5][1] = total/cray; matgen((double *)a,lda,n,b,&norma); t1 = dtime(); dgefa((double *)a,lda,n,ipvt,&info); st[0][2] = dtime() - t1; t1 = dtime(); dgesl((double *)a,lda,n,ipvt,b,0); st[1][2] = dtime() - t1; total = st[0][2] + st[1][2]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][2] = total; st[3][2] = ops/(1.0e3*total); st[4][2] = 2.0e3/st[3][2]; st[5][2] = total/cray; tm2 = 0.0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen((double *)a,lda,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa((double *)a,lda,n,ipvt,&info); } st[0][3] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl((double *)a,lda,n,ipvt,b,0); } st[1][3] = (dtime() - t1)/ntimes; total = st[0][3] + st[1][3]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][3] = total; st[3][3] = ops/(1.0e3*total); st[4][3] = 2.0e3/st[3][3]; st[5][3] = total/cray; matgen((double *)aa,ldaa,n,b,&norma); t1 = dtime(); dgefa((double *)aa,ldaa,n,ipvt,&info); st[0][4] = dtime() - t1; t1 = dtime(); dgesl((double *)aa,ldaa,n,ipvt,b,0); st[1][4] = dtime() - t1; total = st[0][4] + st[1][4]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][4] = total; st[3][4] = ops/(1.0e3*total); st[4][4] = 2.0e3/st[3][4]; st[5][4] = total/cray; matgen((double *)aa,ldaa,n,b,&norma); t1 = dtime(); dgefa((double *)aa,ldaa,n,ipvt,&info); st[0][5] = dtime() - t1; t1 = dtime(); dgesl((double *)aa,ldaa,n,ipvt,b,0); st[1][5] = dtime() - t1; total = st[0][5] + st[1][5]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][5] = total; st[3][5] = ops/(1.0e3*total); st[4][5] = 2.0e3/st[3][5]; st[5][5] = total/cray; matgen((double *)aa,ldaa,n,b,&norma); t1 = dtime(); dgefa((double *)aa,ldaa,n,ipvt,&info); st[0][6] = dtime() - t1; t1 = dtime(); dgesl((double *)aa,ldaa,n,ipvt,b,0); st[1][6] = dtime() - t1; total = st[0][6] + st[1][6]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][6] = total; st[3][6] = ops/(1.0e3*total); st[4][6] = 2.0e3/st[3][6]; st[5][6] = total/cray; tm2 = 0; t1 = dtime(); for (i = 0; i < ntimes; i++) { tm = dtime(); matgen((double *)aa,ldaa,n,b,&norma); tm2 = tm2 + dtime() - tm; dgefa((double *)aa,ldaa,n,ipvt,&info); } st[0][7] = (dtime() - t1 - tm2)/ntimes; t1 = dtime(); for (i = 0; i < ntimes; i++) { dgesl((double *)aa,ldaa,n,ipvt,b,0); } st[1][7] = (dtime() - t1)/ntimes; total = st[0][7] + st[1][7]; /* On extremely fast machines, the total time between checks can be less than the resolution of the clock. In this case, total will be 0. Set it to the time 1 clock tick takes as a way to avoid dividing by 0. Derek Wright, 9/4/97 */ if( total == 0 ) total = one_tick; st[2][7] = total; st[3][7] = ops/(1.0e3*total); st[4][7] = 2.0e3/st[3][7]; st[5][7] = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics "nint(min(st[3][3],st[3][7]))" */ /* kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7]; kf = (kf > ZERO) ? (kf + .5) : (kf - .5); if (fabs((double)kf) < ONE) kflops = 0; else { kflops = floor(fabs((double)kf)); if (kf < ZERO) kflops = -kflops; } */ if ( st[3][3] < ZERO ) st[3][3] = ZERO; if ( st[3][7] < ZERO ) st[3][7] = ZERO; kf = st[3][3]; if ( st[3][7] < st[3][3] ) kf = st[3][7]; kflops = (int)(kf + 0.5); return kflops; }
static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; char buf[80]; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=second(); for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,1); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,1); tdgesl += second()-t1; } for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,0); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,0); tdgesl += second()-t1; } totalt=second()-totalt; if (totalt<0.5 || tdgefa+tdgesl<0.2) return(0.); kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; sprintf(buf,"%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps,totalt,100.*tdgefa/totalt, 100.*tdgesl/totalt,100.*toverhead/totalt, kflops/1000.0); /* Callback to Java to print buf */ pass_jni_msg(buf); if(totalt > 10.)//only send data to server when bench is done pass_result("mflops", (float) (kflops/1000.0)); return(totalt); }
void test03 ( int n ) /******************************************************************************/ /* Purpose: TEST03 runs the revised version of SGEFA in sequential mode. Modified: 07 April 2008 Author: John Burkardt */ { float *a; float *b; float err; int i; int info; int *ipvt; int job; int lda; double wtime; float *x; /* Generate the linear system A * x = b. */ lda = n; a = ( float * ) malloc ( lda * n * sizeof ( float ) ); b = ( float * ) malloc ( n * sizeof ( float ) ); x = ( float * ) malloc ( n * sizeof ( float ) ); matgen ( lda, n, a, x, b ); /* Factor the linear system. */ ipvt = ( int * ) malloc ( n * sizeof ( int ) ); wtime = omp_get_wtime ( ); info = msgefa2 ( a, lda, n, ipvt ); wtime = omp_get_wtime ( ) - wtime; if ( info != 0 ) { printf ( "\n" ); printf ( "TEST03 - Fatal error!\n" ); printf ( " MSGEFA2 reports the matrix is singular.\n" ); exit ( 1 ); } /* Solve the linear system. */ job = 0; sgesl ( a, lda, n, ipvt, b, job ); err = 0.0; for ( i = 0; i < n; i++ ) { err = err + fabs ( x[i] - b[i] ); } printf ( " Revised Sequential %8d %10.4e \n", n, err ); free ( a ); free ( b ); free ( ipvt ); free ( x ); return; }