// Validates the result
void validate(double **a, double *b, double *x, int n) {
  // copy b into x
  for (int i = 0; i < n; ++i) {
    x[i] = b[i];
  }

  // reset A and B arrays to orignal rand values
  double biggestA = fillArray(a, n, b);

  for (int i = 0; i < n; ++i) {
    b[i] = -b[i];
  }

  // multipy a*x, add to b
  dmxpy(n, b, n, x, a);

  double biggestB = 0.0;
  double biggestX = 0.0;
  for (int i = 0; i < n; ++i) {
    biggestB = (biggestB > abs(b[i])) ? biggestB : abs(b[i]);
    biggestX = (biggestX > abs(x[i])) ? biggestX : abs(x[i]);
  }

  double residn =
      biggestB / (n * biggestA * biggestX * (2.2204460492503131e-016));
  assert(residn < CHECK_VALUE);
  /*
  if (residn > CHECK_VALUE) {
    assert(false);
     cout << "Validation failed!" << endl;
     cout << "Computed Norm Res = " << residn << endl;
     cout << "Reference Norm Res = " << CHECK_VALUE << endl;
  } else {
     cout << "Calculations are correct!" << endl;
     cout << "Computed Norm Res = " << residn << endl;
     cout << "Reference Norm Res = " << CHECK_VALUE << endl;
  }
  */
}
int main (int argc, char *argv[])
{
        static REAL aa[200*200],a[200*201],b[200],x[200];       
        REAL cray,ops,total,norma,normx;
        REAL resid,residn,eps,tm2,epsn,x1,x2;
        REAL mflops;
        static int ipvt[200],n,i,j,ntimes,info,lda,ldaa;
        int endit, pass, loop;
        REAL overhead1, overhead2, time2;
        REAL max1, max2;
        char was[5][20];
        char expect[5][20];
        char title[5][20];
        int errors;
        
 
        printf("\n");
         
        printf("##########################################\n"); 


    
        lda = 201;
        ldaa = 200;
        cray = .056; 
        n = 100;

        fprintf(stdout, "%s ", ROLLING);
        fprintf(stdout, "%s ", PREC);
        fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n");

        fprintf(stdout,"Optimisation %s\n\n",options);

        ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

        matgen(a,lda,n,b,&norma);
        start_time();
        dgefa(a,lda,n,ipvt,&info);
        end_time();
        atime[0][0] = secs;
        start_time();
        dgesl(a,lda,n,ipvt,b,0);
        end_time();
        atime[1][0] = secs;
        total = atime[0][0] + atime[1][0];

/*     compute a residual to verify results.  */ 

        for (i = 0; i < n; i++) {
                x[i] = b[i];
        }
        matgen(a,lda,n,b,&norma);
        for (i = 0; i < n; i++) {
                b[i] = -b[i];
        }
        dmxpy(n,b,n,lda,x,a);
        resid = 0.0;
        normx = 0.0;
        for (i = 0; i < n; i++) {
                resid = (resid > fabs((double)b[i])) 
                        ? resid : fabs((double)b[i]);
                normx = (normx > fabs((double)x[i])) 
                        ? normx : fabs((double)x[i]);
        }
        eps = epslon(ONE);
        residn = resid/( n*norma*normx*eps );
        epsn = eps;
        x1 = x[0] - 1;
        x2 = x[n-1] - 1;
        
        printf("norm resid      resid           machep");
        printf("         x[0]-1          x[n-1]-1\n");
        printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n",
               (double)residn, (double)resid, (double)epsn, 
               (double)x1, (double)x2);

        fprintf(stderr,"Times are reported for matrices of order        %5d\n",n);
        fprintf(stderr,"1 pass times for array with leading dimension of%5d\n\n",lda);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");

        atime[2][0] = total;
        if (total > 0.0)
        {
            atime[3][0] = ops/(1.0e6*total);
            atime[4][0] = 2.0/atime[3][0];
        }
        else
        {
            atime[3][0] = 0.0;
            atime[4][0] = 0.0;
        }
        atime[5][0] = total/cray;
       
        print_time(0);

/************************************************************************
 *       Calculate overhead of executing matgen procedure              *
 ************************************************************************/
       
        fprintf (stderr,"\nCalculating matgen overhead\n");
        pass = -20;
        loop = NTIMES;
        do
        {
            start_time();
            pass = pass + 1;        
            for ( i = 0 ; i < loop ; i++)
            {
                 matgen(a,lda,n,b,&norma);
            }
            end_time();
            overhead1 = secs;
            fprintf (stderr,"%10d times %6.2f seconds\n", loop, overhead1);
            if (overhead1 > runSecs)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (overhead1 < 0.1)
                {
                    loop = loop * 10;
                }
                else
                {
                    loop = loop * 2;
                }
            }
        }
        while (pass < 0);
        
        overhead1 = overhead1 / (double)loop;

        fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1);

/************************************************************************
 *           Calculate matgen/dgefa passes for runSecs seconds                *
 ************************************************************************/
       
        fprintf (stderr,"Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs);
        pass = -20;
        ntimes = NTIMES;
        do
        {
            start_time();
            pass = pass + 1;        
            for ( i = 0 ; i < ntimes ; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }
            end_time();
            time2 = secs;
            fprintf (stderr,"%10d times %6.2f seconds\n", ntimes, time2);
            if (time2 > runSecs)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (time2 < 0.1)
                {
                    ntimes = ntimes * 10;
                }
                else
                {
                    ntimes = ntimes * 2;
                }
            }
        }
        while (pass < 0);
        
        ntimes =  (int)(runSecs * (double)ntimes / time2);
        if (ntimes == 0) ntimes = 1;

        fprintf(stderr,"Passes used %10d \n\n", ntimes);
        fprintf(stderr,"Times for array with leading dimension of%4d\n\n",lda);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");        

/************************************************************************
 *                              Execute 5 passes                        *
 ************************************************************************/
      
        tm2 = ntimes * overhead1;
        atime[3][6] = 0;

        for (j=1 ; j<6 ; j++)
        {
            start_time();
            for (i = 0; i < ntimes; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }
            end_time();
            atime[0][j] = (secs - tm2)/ntimes;

            start_time();              
            for (i = 0; i < ntimes; i++)
            {
                dgesl(a,lda,n,ipvt,b,0);
            }
            end_time();

            atime[1][j] = secs/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][6] = atime[3][6] + atime[3][j];
            
            print_time(j);
        }
        atime[3][6] = atime[3][6] / 5.0;
        fprintf (stderr,"Average                          %11.2f\n",
                                               (double)atime[3][6]);        
        
        fprintf (stderr,"\nCalculating matgen2 overhead\n");

/************************************************************************
 *             Calculate overhead of executing matgen procedure         *
 ************************************************************************/

        start_time();        
        for ( i = 0 ; i < loop ; i++)
        {
            matgen(aa,ldaa,n,b,&norma);    
        }
        end_time();
        overhead2 = secs;
        overhead2 = overhead2 / (double)loop;
        
        fprintf(stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2);
        fprintf(stderr,"Times for array with leading dimension of%4d\n\n",ldaa);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");

/************************************************************************
 *                              Execute 5 passes                        *
 ************************************************************************/
              
        tm2 = ntimes * overhead2;
        atime[3][12] = 0;

        for (j=7 ; j<12 ; j++)
        {
            start_time();
            for (i = 0; i < ntimes; i++)
            {
                matgen(aa,ldaa,n,b,&norma);
                dgefa(aa,ldaa,n,ipvt,&info  );
            }
            end_time();
            atime[0][j] = (secs - tm2)/ntimes;
            
            start_time();      
            for (i = 0; i < ntimes; i++)
            {
                dgesl(aa,ldaa,n,ipvt,b,0);
            }
            end_time();
            atime[1][j] = secs/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][12] = atime[3][12] + atime[3][j];

            print_time(j);
        }
        atime[3][12] = atime[3][12] / 5.0; 
        fprintf (stderr,"Average                          %11.2f\n",
                                              (double)atime[3][12]);  

/************************************************************************
 *           Use minimum average as overall Mflops rating               *
 ************************************************************************/
      
        mflops = atime[3][6];
        if (atime[3][12] < mflops) mflops = atime[3][12];
       
        fprintf(stderr,"\n");
        fprintf(stderr, "%s ", ROLLING);
        fprintf(stderr, "%s ", PREC);
        fprintf(stderr," Precision %11.2f Mflops \n\n",mflops);


    max1 = 0;
    for (i=1 ; i<6 ; i++)
    {
        if (atime[3][i] > max1) max1 = atime[3][i];                 
    }

    max2 = 0;
    for (i=7 ; i<12 ; i++)
    {                 
        if (atime[3][i] > max2) max2 = atime[3][i];                 
    }
    if (max1 < max2) max2 = max1;
   
    sprintf(was[0], "%16.1f",(double)residn);
    sprintf(was[1], "%16.8e",(double)resid);
    sprintf(was[2], "%16.8e",(double)epsn);
    sprintf(was[3], "%16.8e",(double)x1);
    sprintf(was[4], "%16.8e",(double)x2);

/*
    //  Values for Watcom

    sprintf(expect[0], "             0.4");
    sprintf(expect[1], " 7.41628980e-014");
    sprintf(expect[2], " 1.00000000e-015");
    sprintf(expect[3], "-1.49880108e-014");
    sprintf(expect[4], "-1.89848137e-014");
    // Values for Visual C++

    sprintf(expect[0], "             1.7");
    sprintf(expect[1], " 7.41628980e-014");
    sprintf(expect[2], " 2.22044605e-016");
    sprintf(expect[3], "-1.49880108e-014");
    sprintf(expect[4], "-1.89848137e-014");

    // Values for Ubuntu GCC 32 Bit

    sprintf(expect[0], "             1.9");
    sprintf(expect[1], "  8.39915160e-14");
    sprintf(expect[2], "  2.22044605e-16");
    sprintf(expect[3], " -6.22835117e-14");
    sprintf(expect[4], " -4.16333634e-14");
*/

     // Values for Ubuntu GCC 32 Bit

    sprintf(expect[0], "             1.7");
    sprintf(expect[1], "  7.41628980e-14");
    sprintf(expect[2], "  2.22044605e-16");
    sprintf(expect[3], " -1.49880108e-14");
    sprintf(expect[4], " -1.89848137e-14");

    sprintf(title[0], "norm. resid");
    sprintf(title[1], "resid      ");
    sprintf(title[2], "machep     ");
    sprintf(title[3], "x[0]-1     ");
    sprintf(title[4], "x[n-1]-1   ");

    if (strtol(opt, NULL, 10) == 0)
    {
        sprintf(expect[2], " 8.88178420e-016");
    }
    errors = 0;

    printf ("\n");
}
Example #3
0
main ()
#endif
{
   static REAL aa[ORDER2][ORDER2],a[ORDER2][ORDER2P1],b[ORDER2],x[ORDER2];
   REAL cray,ops,total,norma,normx;
   REAL resid,residn,eps;
   REAL kf;
   double t1,tm,tm2,dtime();
   static int ipvt[ORDER2],n,i,ntimes,info,lda,ldaa,kflops;

   lda = ORDER2P1;
   ldaa = ORDER2;
   cray = .056; 
   n = ORDER;

#ifdef OMPC
   {
     int		c;
     extern char	*optarg;

     while ((c = getopt (argc, argv, "b:")) != EOF) {
       switch (c) {
       case 'b':
	 bf = atoi (optarg);
	 break;
       }
     }
   }

   if (omp_get_max_threads () != 1) {
     printf ("OpenMP(%d threads)\n", omp_get_max_threads ());
   } else {
     printf ("OpenMP(1 thread)\n");
   }
#endif
   printf(ROLLING); printf(PREC);
   printf("Precision Linpack\n\n");

       ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

       matgen(a,lda,n,b,&norma);
       t1 = dtime();
       dgefa(a,lda,n,ipvt,&info);
       st[0][0] = dtime() - t1;
       
       t1 = dtime();
       dgesl(a,lda,n,ipvt,b,0);
       st[1][0] = dtime() - t1;
       total = st[0][0] + st[1][0];

/*     compute a residual to verify results.  */ 

       for (i = 0; i < n; i++)
	  {
		x[i] = b[i];
	  }
       matgen(a,lda,n,b,&norma);
       for (i = 0; i < n; i++) 
	  {
		b[i] = -b[i];
	  }
       dmxpy(n,b,n,lda,x,a);
       resid = 0.0;
       normx = 0.0;
       for (i = 0; i < n; i++)
	{
		resid = (resid > fabs((double)b[i])) 
	? resid : fabs((double)b[i]);
		normx = (normx > fabs((double)x[i])) 
	? normx : fabs((double)x[i]);
	}
       eps = epslon((REAL)ONE);
       residn = resid/( n*norma*normx*eps );
   
   printf("   norm. resid      resid           machep");
   printf("         x[0]-1        x[n-1]-1\n");
   printf("%8.1f      %16.8e%16.8e%16.8e%16.8e\n",
	 (double)residn, (double)resid, (double)eps, 
		(double)x[0]-1, (double)x[n-1]-1);

printf(" times are reported for matrices of order %5d\n",n);
printf("      dgefa      dgesl      total       kflops     unit");
printf("      ratio\n");

       st[2][0] = total;
       st[3][0] = ops/(1.0e3*total);
       st[4][0] = 2.0e3/st[3][0];
       st[5][0] = total/cray;

   printf(" times for array with leading dimension of%5d\n",lda);
   print_time(0);

       matgen(a,lda,n,b,&norma);
       t1 = dtime();
       dgefa(a,lda,n,ipvt,&info);
       st[0][1] = dtime() - t1;
       
       t1 = dtime();
       dgesl(a,lda,n,ipvt,b,0);
       st[1][1] = dtime() - t1;
       total = st[0][1] + st[1][1];
       
       st[2][1] = total;
       st[3][1] = ops/(1.0e3*total);
       st[4][1] = 2.0e3/st[3][1];
       st[5][1] = total/cray;

       matgen(a,lda,n,b,&norma);
       
       t1 = dtime();
       dgefa(a,lda,n,ipvt,&info);
       st[0][2] = dtime() - t1;
       
       t1 = dtime();
       dgesl(a,lda,n,ipvt,b,0);
       st[1][2] = dtime() - t1;
       
       total = st[0][2] + st[1][2];
       st[2][2] = total;
       st[3][2] = ops/(1.0e3*total);
       st[4][2] = 2.0e3/st[3][2];
       st[5][2] = total/cray;

       ntimes = NTIMES;
       tm2 = 0.0;
       t1 = dtime();

   for (i = 0; i < ntimes; i++) {
		tm = dtime();
       matgen(a,lda,n,b,&norma);
       tm2 = tm2 + dtime() - tm;
       dgefa(a,lda,n,ipvt,&info);
       }

       st[0][3] = (dtime() - t1 - tm2)/ntimes;
       t1 = dtime();

   for (i = 0; i < ntimes; i++) {
		dgesl(a,lda,n,ipvt,b,0);
       }

       st[1][3] = (dtime() - t1)/ntimes;
       total = st[0][3] + st[1][3];
       st[2][3] = total;
       st[3][3] = ops/(1.0e3*total);
       st[4][3] = 2.0e3/st[3][3];
       st[5][3] = total/cray;

   print_time(1);
   print_time(2);
   print_time(3);

       matgen(aa,ldaa,n,b,&norma);
       t1 = dtime();
       dgefa(aa,ldaa,n,ipvt,&info);
       st[0][4] = dtime() - t1;
       
       t1 = dtime();
       dgesl(aa,ldaa,n,ipvt,b,0);
       st[1][4] = dtime() - t1;

       total = st[0][4] + st[1][4];
       st[2][4] = total;
       st[3][4] = ops/(1.0e3*total);
       st[4][4] = 2.0e3/st[3][4];
       st[5][4] = total/cray;

       matgen(aa,ldaa,n,b,&norma);
       t1 = dtime();
       dgefa(aa,ldaa,n,ipvt,&info);
       st[0][5] = dtime() - t1;

       t1 = dtime();
       dgesl(aa,ldaa,n,ipvt,b,0);
       st[1][5] = dtime() - t1;

       total = st[0][5] + st[1][5];
       st[2][5] = total;
       st[3][5] = ops/(1.0e3*total);
       st[4][5] = 2.0e3/st[3][5];
       st[5][5] = total/cray;

   matgen(aa,ldaa,n,b,&norma);
   t1 = dtime();
   dgefa(aa,ldaa,n,ipvt,&info);
   st[0][6] = dtime() - t1;

   t1 = dtime();
   dgesl(aa,ldaa,n,ipvt,b,0);
   st[1][6] = dtime() - t1;

   total = st[0][6] + st[1][6];
   st[2][6] = total;
   st[3][6] = ops/(1.0e3*total);
   st[4][6] = 2.0e3/st[3][6];
   st[5][6] = total/cray;

   ntimes = NTIMES;
   tm2 = 0;
   t1 = dtime();
   for (i = 0; i < ntimes; i++) {
       tm = dtime();
       matgen(aa,ldaa,n,b,&norma);
       tm2 = tm2 + dtime() - tm;
       dgefa(aa,ldaa,n,ipvt,&info);
       }

   st[0][7] = (dtime() - t1 - tm2)/ntimes;
   
   t1 = dtime();
   for (i = 0; i < ntimes; i++) {
       dgesl(aa,ldaa,n,ipvt,b,0);
       }

   st[1][7] = (dtime() - t1)/ntimes;
   total = st[0][7] + st[1][7];
   st[2][7] = total;
   st[3][7] = ops/(1.0e3*total);
   st[4][7] = 2.0e3/st[3][7];
   st[5][7] = total/cray;

   /* the following code sequence implements the semantics of
       the Fortran intrinsics "nint(min(st[3][3],st[3][7]))"   */
/*
   kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7];
   kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
   if (fabs((double)kf) < ONE) 
       kflops = 0;
   else {
       kflops = floor(fabs((double)kf));
       if (kf < ZERO) kflops = -kflops;
   }
*/
   if ( st[3][3] < ZERO ) st[3][3] = ZERO;
   if ( st[3][7] < ZERO ) st[3][7] = ZERO;
   kf = st[3][3];
   if ( st[3][7] < st[3][3] ) kf = st[3][7];
   kflops = (int)(kf + 0.5);

   printf(" times for array with leading dimension of%5d\n",ldaa);
   print_time(4);
   print_time(5);
   print_time(6);
   print_time(7);
   printf(ROLLING); printf(PREC);
   printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES);
}
Example #4
0
main ()
{
        static REAL aa[200*200],a[200*201],b[200],x[200];       
        REAL cray,ops,total,norma,normx;
        REAL resid,residn,eps,t1,tm2,epsn,x1,x2;
        REAL mflops;
        static int ipvt[200],n,i,j,ntimes,info,lda,ldaa;
        int Endit, pass, loop;
        REAL overhead1, overhead2, time1, time2;
        FILE    *outfile;
        char *compiler, *options, general[9][80] = {" "}; 
         
        outfile = fopen("Linpack.txt","a+");
        if (outfile == NULL)
        {
            printf ("Cannot open results file \n\n");
            printf("Press any key\n");
            #ifdef DOS
            Endit = getch();
            #endif
            exit (0);
        }

/************************************************************************
 *           Enter details of compiler and options used                 *
 ************************************************************************/
                  /*----------------- --------- --------- ---------*/
        compiler = "INSERT COMPILER NAME HERE";
        options  = "INSERT OPTIMISATION OPTIONS HERE";
                  /* Include -dDP or -dSP and -dROLL or -dUNROLL */
    
        lda = 201;
        ldaa = 200;
        cray = .056; 
        n = 100;

        fprintf(stdout,ROLLING);fprintf(stdout,PREC);
        fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n");
        fprintf(stdout,"Compiler     %s\n",compiler);
        fprintf(stdout,"Optimisation %s\n\n",options);

        ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info);
        atime[0][0] = second() - t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0);
        atime[1][0] = second() - t1;
        total = atime[0][0] + atime[1][0];

/*     compute a residual to verify results.  */ 

        for (i = 0; i < n; i++) {
                x[i] = b[i];
        }
        matgen(a,lda,n,b,&norma);
        for (i = 0; i < n; i++) {
                b[i] = -b[i];
        }
        dmxpy(n,b,n,lda,x,a);
        resid = 0.0;
        normx = 0.0;
        for (i = 0; i < n; i++) {
                resid = (resid > fabs((double)b[i])) 
                        ? resid : fabs((double)b[i]);
                normx = (normx > fabs((double)x[i])) 
                        ? normx : fabs((double)x[i]);
        }
        eps = epslon(ONE);
        residn = resid/( n*norma*normx*eps );
        epsn = eps;
        x1 = x[0] - 1;
        x2 = x[n-1] - 1;
        
        printf("norm resid      resid           machep");
        printf("         x[0]-1          x[n-1]-1\n");
        printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n",
               (double)residn, (double)resid, (double)epsn, 
               (double)x1, (double)x2);

        fprintf(stderr,"Times are reported for matrices of order        %5d\n",n);
        fprintf(stderr,"1 pass times for array with leading dimension of%5d\n\n",lda);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");

        atime[2][0] = total;
        if (total > 0.0)
        {
            atime[3][0] = ops/(1.0e6*total);
            atime[4][0] = 2.0/atime[3][0];
        }
        else
        {
            atime[3][0] = 0.0;
            atime[4][0] = 0.0;
        }
        atime[5][0] = total/cray;
       
        print_time(0);

/************************************************************************
 *       Calculate overhead of executing matgen procedure              *
 ************************************************************************/
       
        fprintf (stderr,"\nCalculating matgen overhead\n");
        pass = -20;
        loop = NTIMES;
        do
        {
            time1 = second();
            pass = pass + 1;        
            for ( i = 0 ; i < loop ; i++)
            {
                 matgen(a,lda,n,b,&norma);
            }
            time2 = second();
            overhead1 = (time2 - time1);
            fprintf (stderr,"%10d times %6.2f seconds\n", loop, overhead1);
            if (overhead1 > 5.0)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (overhead1 < 0.1)
                {
                    loop = loop * 10;
                }
                else
                {
                    loop = loop * 2;
                }
            }
        }
        while (pass < 0);
        
        overhead1 = overhead1 / (double)loop;

        fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1);

/************************************************************************
 *           Calculate matgen/dgefa passes for 5 seconds                *
 ************************************************************************/
       
        fprintf (stderr,"Calculating matgen/dgefa passes for 5 seconds\n");
        pass = -20;
        ntimes = NTIMES;
        do
        {
            time1 = second();
            pass = pass + 1;        
            for ( i = 0 ; i < ntimes ; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }
            time2 = second() - time1;
            fprintf (stderr,"%10d times %6.2f seconds\n", ntimes, time2);
            if (time2 > 5.0)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (time2 < 0.1)
                {
                    ntimes = ntimes * 10;
                }
                else
                {
                    ntimes = ntimes * 2;
                }
            }
        }
        while (pass < 0);
        
        ntimes =  5.0 * (double)ntimes / time2;
        if (ntimes == 0) ntimes = 1;

        fprintf (stderr,"Passes used %10d \n\n", ntimes);
        fprintf(stderr,"Times for array with leading dimension of%4d\n\n",lda);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");        

/************************************************************************
 *                              Execute 5 passes                        *
 ************************************************************************/
      
        tm2 = ntimes * overhead1;
        atime[3][6] = 0;

        for (j=1 ; j<6 ; j++)
        {
        
            t1 = second();

            for (i = 0; i < ntimes; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }

            atime[0][j] = (second() - t1 - tm2)/ntimes;

            t1 = second();      
        
            for (i = 0; i < ntimes; i++)
            {
                dgesl(a,lda,n,ipvt,b,0);
            }

            atime[1][j] = (second() - t1)/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][6] = atime[3][6] + atime[3][j];
            
            print_time(j);
        }
        atime[3][6] = atime[3][6] / 5.0;
        fprintf (stderr,"Average                          %11.2f\n",
                                               (double)atime[3][6]);        
        
        fprintf (stderr,"\nCalculating matgen2 overhead\n");

/************************************************************************
 *             Calculate overhead of executing matgen procedure         *
 ************************************************************************/

        time1 = second();        
        for ( i = 0 ; i < loop ; i++)
        {
            matgen(aa,ldaa,n,b,&norma);    
        }
        time2 = second();
        overhead2 = (time2 - time1);
        overhead2 = overhead2 / (double)loop;
        
        fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2);
        fprintf(stderr,"Times for array with leading dimension of%4d\n\n",ldaa);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");

/************************************************************************
 *                              Execute 5 passes                        *
 ************************************************************************/
              
        tm2 = ntimes * overhead2;
        atime[3][12] = 0;

        for (j=7 ; j<12 ; j++)
        {
        
            t1 = second();

            for (i = 0; i < ntimes; i++)
            {
                matgen(aa,ldaa,n,b,&norma);
                dgefa(aa,ldaa,n,ipvt,&info  );
            }

            atime[0][j] = (second() - t1 - tm2)/ntimes;

            t1 = second();      
        
            for (i = 0; i < ntimes; i++)
            {
                dgesl(aa,ldaa,n,ipvt,b,0);
            }

            atime[1][j] = (second() - t1)/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][12] = atime[3][12] + atime[3][j];

            print_time(j);
        }
        atime[3][12] = atime[3][12] / 5.0; 
        fprintf (stderr,"Average                          %11.2f\n",
                                              (double)atime[3][12]);  

/************************************************************************
 *           Use minimum average as overall Mflops rating               *
 ************************************************************************/
      
        mflops = atime[3][6];
        if (atime[3][12] < mflops) mflops = atime[3][12];
       
        fprintf(stderr,"\n");
        fprintf(stderr,ROLLING);fprintf(stderr,PREC);
        fprintf(stderr," Precision %11.2f Mflops \n\n",mflops);

        what_date();

/************************************************************************
 *             Type details of hardware, software etc.                  *
 ************************************************************************/

    printf ("Enter the following data which will be "
                                "appended to file Linpack.txt \n\n");
    printf ("PC Supplier/model ?\n                    ");
    scanf ("%[^\n]", general[1]);
    fflush (stdin);
    printf ("CPU               ?\n                    ");
    scanf ("%[^\n]", general[2]);
    fflush (stdin);
    printf ("Clock MHz         ?\n                    ");
    scanf ("%[^\n]", general[3]);
    fflush (stdin);
    printf ("Cache             ?\n                    ");
    scanf ("%[^\n]", general[4]);
    fflush (stdin);
    printf ("Chipset/options   ?\n                    ");
    scanf ("%[^\n]", general[5]);
    fflush (stdin);
    printf ("OS/DOS version    ?\n                    ");
    scanf ("%[^\n]", general[6]);
    fflush (stdin);
    printf ("Your name         ?\n                    ");
    scanf ("%[^\n]", general[7]);
    fflush (stdin);
    printf ("Where from        ?\n                    ");
    scanf ("%[^\n]", general[8]);
    fflush (stdin);
    printf ("Mail address      ?\n                    ");
    scanf ("%[^\n]", general[0]);
    fflush (stdin);

/************************************************************************
 *              Add results to output file LLloops.txt                  *
 ************************************************************************/
            
    fprintf (outfile, "----------------- ----------------- --------- "
                      "--------- ---------\n");
    fprintf (outfile, "LINPACK BENCHMARK FOR PCs 'C/C++'    n @ 100\n\n");
    fprintf (outfile, "Month run         %d/%d\n", this_month, this_year);
    fprintf (outfile, "PC model          %s\n", general[1]);
    fprintf (outfile, "CPU               %s\n", general[2]);
    fprintf (outfile, "Clock MHz         %s\n", general[3]);
    fprintf (outfile, "Cache             %s\n", general[4]);
    fprintf (outfile, "Options           %s\n", general[5]);
    fprintf (outfile, "OS/DOS            %s\n", general[6]);
    fprintf (outfile, "Compiler          %s\n", compiler);
    fprintf (outfile, "OptLevel          %s\n", options);
    fprintf (outfile, "Run by            %s\n", general[7]);
    fprintf (outfile, "From              %s\n", general[8]);
    fprintf (outfile, "Mail              %s\n\n", general[0]);
    
    fprintf(outfile, "Rolling            %s\n",ROLLING);
    fprintf(outfile, "Precision          %s\n",PREC); 
    fprintf(outfile, "norm. resid        %16.1f\n",(double)residn);
    fprintf(outfile, "resid              %16.8e\n",(double)resid);
    fprintf(outfile, "machep             %16.8e\n",(double)epsn);
    fprintf(outfile, "x[0]-1             %16.8e\n",(double)x1);
    fprintf(outfile, "x[n-1]-1           %16.8e\n",(double)x2);
    fprintf(outfile, "matgen 1 seconds   %16.5f\n",overhead1);
    fprintf(outfile, "matgen 2 seconds   %16.5f\n",overhead2); 
    fprintf(outfile, "Repetitions        %16d\n",ntimes);
    fprintf(outfile, "Leading dimension  %16d\n",lda);  
    fprintf(outfile, "                              dgefa     dgesl "
                     "    total    Mflops\n");
    fprintf(outfile, "1 pass seconds     %16.5f %9.5f %9.5f\n",
                      atime[0][0], atime[1][0], atime[2][0]);
                      
    for (i=1 ; i<6 ; i++)
    {                 
        fprintf(outfile, "Repeat seconds     %16.5f %9.5f %9.5f %9.2f\n",                
                       atime[0][i], atime[1][i], atime[2][i], atime[3][i]);
    }
    fprintf(outfile, "Average            %46.2f\n",atime[3][6]);
    
    fprintf(outfile, "Leading dimension  %16d\n",ldaa);
     
    for (i=7 ; i<12 ; i++)
    {                 
        fprintf(outfile, "Repeat seconds     %16.5f %9.5f %9.5f %9.2f\n",                
                       atime[0][i], atime[1][i], atime[2][i], atime[3][i]);
    }
    fprintf(outfile, "Average            %46.2f\n\n",atime[3][12]); 
    
    fclose (outfile);
    
    printf("\nPress any key\n");
    #ifdef DOS
    Endit = getch();
    #endif
}
Example #5
0
void main ()
{
   static REAL aa[200][200],a[200][201],b[200],x[200];
   REAL cray,ops,total,norma,normx;
   REAL resid,residn,eps;
   REAL epslon(),kf;
#if 0
   double t1;
   double tm;
#endif
   double tm2;
   double dtime();
   static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops;
   static user_timer second_timer;

   lda = 201;
   ldaa = 200;
   cray = .056; 
   n = 100;

   printf(ROLLING); printf(PREC);
   printf("Precision Linpack\n\n");

	ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

	matgen(a,lda,n,b,&norma);
	TimerOn();
	dgefa(a,lda,n,ipvt,&info);
	TimerOff();
	st[0][0] = TimerElapsed();
	Report( "clinpack(dgefa#1)", st[0][0] );
	
	TimerOn();
	dgesl(a,lda,n,ipvt,b,0);
	TimerOff();
	st[1][0] = TimerElapsed();
	Report( "clinpack(dgesl#1)", st[1][0] );
	total = st[0][0] + st[1][0];

/*     compute a residual to verify results.  */ 

	for (i = 0; i < n; i++)
	   {
		 x[i] = b[i];
	   }
	matgen(a,lda,n,b,&norma);
	for (i = 0; i < n; i++) 
	   {
		 b[i] = -b[i];
	   }
	dmxpy(n,b,n,lda,x,a);
	resid = 0.0;
	normx = 0.0;
	for (i = 0; i < n; i++)
	 {
		 resid = (resid > fabs((double)b[i])) 
	 ? resid : fabs((double)b[i]);
		 normx = (normx > fabs((double)x[i])) 
	 ? normx : fabs((double)x[i]);
	 }
	eps = epslon((REAL)ONE);
	residn = resid/( n*norma*normx*eps );
   
   printf("   norm. resid      resid           machep");
   printf("         x[0]-1        x[n-1]-1\n");
   printf("%8.1f      %16.8e%16.8e%16.8e%16.8e\n",
	  (double)residn, (double)resid, (double)eps, 
		 (double)x[0]-1, (double)x[n-1]-1);

printf(" times are reported for matrices of order %5d\n",n);
printf("      dgefa      dgesl      total       kflops     unit");
printf("      ratio\n");

	st[2][0] = total;
	st[3][0] = ops/(1.0e3*total);
	st[4][0] = 2.0e3/st[3][0];
	st[5][0] = total/cray;

   printf(" times for array with leading dimension of%5d\n",lda);
   print_time(0);

	matgen(a,lda,n,b,&norma);
	TimerOn();
	dgefa(a,lda,n,ipvt,&info);
	TimerOff();
	st[0][1] = TimerElapsed();
	Report( "clinpack(dgefa#2)", st[0][1] );
	
	TimerOn();
	dgesl(a,lda,n,ipvt,b,0);
	TimerOff();
	st[1][1] = TimerElapsed();
	Report( "clinpack(dgesl#2)", st[1][1] );
	total = st[0][1] + st[1][1];
	
	st[2][1] = total;
	st[3][1] = ops/(1.0e3*total);
	st[4][1] = 2.0e3/st[3][1];
	st[5][1] = total/cray;

	matgen(a,lda,n,b,&norma);
	
	TimerOn();
	dgefa(a,lda,n,ipvt,&info);
	TimerOff();
	st[0][2] = TimerElapsed();
	Report( "clinpack(dgefa#3)", st[0][2] );
	
	TimerOn();
	dgesl(a,lda,n,ipvt,b,0);
	TimerOff();
	st[1][2] = TimerElapsed();
	Report( "clinpack(dgesl#3)", st[1][2] );
	
	total = st[0][2] + st[1][2];
	st[2][2] = total;
	st[3][2] = ops/(1.0e3*total);
	st[4][2] = 2.0e3/st[3][2];
	st[5][2] = total/cray;

	ntimes = NTIMES;
	tm2 = 0.0;
	UserTimerOn( &second_timer );

   for (i = 0; i < ntimes; i++) {
	TimerOn();
	matgen(a,lda,n,b,&norma);
	TimerOff();
	tm2 = tm2 + TimerElapsed();
	dgefa(a,lda,n,ipvt,&info);
	}

	UserTimerOff( &second_timer );
	st[0][3] = ( UserTimerElapsed( &second_timer ) - tm2)/ntimes;
	Report( "clinpack(dgefa#4)", st[0][3] );

	TimerOn();
   for (i = 0; i < ntimes; i++) {
		 dgesl(a,lda,n,ipvt,b,0);
	}
	TimerOff();

	st[1][3] = TimerElapsed()/ntimes;
	Report( "clinpack(dgesl#4)", st[1][3] );
	total = st[0][3] + st[1][3];
	st[2][3] = total;
	st[3][3] = ops/(1.0e3*total);
	st[4][3] = 2.0e3/st[3][3];
	st[5][3] = total/cray;

   print_time(1);
   print_time(2);
   print_time(3);

	matgen(aa,ldaa,n,b,&norma);
	TimerOn();
	dgefa(aa,ldaa,n,ipvt,&info);
	TimerOff();
	st[0][4] = TimerElapsed();
	Report( "clinpack(dgefa#5)", st[0][4] );
	
	TimerOn();
	dgesl(aa,ldaa,n,ipvt,b,0);
	TimerOff();
	st[1][4] = TimerElapsed();
	Report( "clinpack(dgesl#5)", st[1][4] );

	total = st[0][4] + st[1][4];
	st[2][4] = total;
	st[3][4] = ops/(1.0e3*total);
	st[4][4] = 2.0e3/st[3][4];
	st[5][4] = total/cray;

	matgen(aa,ldaa,n,b,&norma);
	TimerOn();
	dgefa(aa,ldaa,n,ipvt,&info);
	TimerOff();
	st[0][5] = TimerElapsed();
	Report( "clinpack(dgefa#6)", st[0][5] );

	TimerOn();
	dgesl(aa,ldaa,n,ipvt,b,0);
	TimerOff();
	st[1][5] = TimerElapsed();
	Report( "clinpack(dgesl#6)", st[1][5] );

	total = st[0][5] + st[1][5];
	st[2][5] = total;
	st[3][5] = ops/(1.0e3*total);
	st[4][5] = 2.0e3/st[3][5];
	st[5][5] = total/cray;

   matgen(aa,ldaa,n,b,&norma);
   TimerOn();
   dgefa(aa,ldaa,n,ipvt,&info);
   TimerOff();
   st[0][6] = TimerElapsed();
   Report( "clinpack(dgefa#7)", st[0][6] );

   TimerOn();
   dgesl(aa,ldaa,n,ipvt,b,0);
   TimerOff();
   st[1][6] = TimerElapsed();
   Report( "clinpack(dgesl#7)", st[1][6] );

   total = st[0][6] + st[1][6];
   st[2][6] = total;
   st[3][6] = ops/(1.0e3*total);
   st[4][6] = 2.0e3/st[3][6];
   st[5][6] = total/cray;

   ntimes = NTIMES;
   tm2 = 0;
   UserTimerOn( &second_timer );
   for (i = 0; i < ntimes; i++) {
	TimerOn();
	matgen(aa,ldaa,n,b,&norma);
	TimerOff();
	tm2 = tm2 + TimerElapsed();
	dgefa(aa,ldaa,n,ipvt,&info);
	}
   UserTimerOff( &second_timer );

   st[0][7] = ( UserTimerElapsed( &second_timer ) - tm2 ) / ntimes;
   Report( "clinpack(dgefa#8)", st[0][7] );
   
   TimerOn();
   for (i = 0; i < ntimes; i++) {
	dgesl(aa,ldaa,n,ipvt,b,0);
	}
   TimerOff();

   st[1][7] = TimerElapsed()/ntimes;
   Report( "clinpack(dgesl#8)", st[1][7] );

   total = st[0][7] + st[1][7];
   st[2][7] = total;
   st[3][7] = ops/(1.0e3*total);
   st[4][7] = 2.0e3/st[3][7];
   st[5][7] = total/cray;

   /* the following code sequence implements the semantics of
	the Fortran intrinsics "nint(min(st[3][3],st[3][7]))"   */
/*
   kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7];
   kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
   if (fabs((double)kf) < ONE) 
	kflops = 0;
   else {
	kflops = floor(fabs((double)kf));
	if (kf < ZERO) kflops = -kflops;
   }
*/
   if ( st[3][3] < ZERO ) st[3][3] = ZERO;
   if ( st[3][7] < ZERO ) st[3][7] = ZERO;
   kf = st[3][3];
   if ( st[3][7] < st[3][3] ) kf = st[3][7];
   kflops = (int)(kf + 0.5);

   printf(" times for array with leading dimension of%4d\n",ldaa);
   print_time(4);
   print_time(5);
   print_time(6);
   print_time(7);
   printf(ROLLING); printf(PREC);
   printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES);
   exit( EXIT_SUCCESS );
}
Example #6
0
main ()
{
	static REAL aa[200][200],a[200][201],b[200],x[200];
	REAL cray,ops,total,norma,normx;
	REAL resid,residn,eps,t1,tm,tm2;
	REAL epslon(),second(),kf;
	static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops;

	lda = 201;
	ldaa = 200;
	cray = .056; 
	n = 100;

	fprintf(stdout,ROLLING);fprintf(stdout,PREC);fprintf(stdout,"Precision Linpack\n\n");
	fprintf(stderr,ROLLING);fprintf(stderr,PREC);fprintf(stderr,"Precision Linpack\n\n");

        ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info);
        time[0][0] = second() - t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0);
        time[1][0] = second() - t1;
        total = time[0][0] + time[1][0];

/*     compute a residual to verify results.  */ 

        for (i = 0; i < n; i++) {
            	x[i] = b[i];
	}
        matgen(a,lda,n,b,&norma);
        for (i = 0; i < n; i++) {
            	b[i] = -b[i];
	}
        dmxpy(n,b,n,lda,x,a);
        resid = 0.0;
        normx = 0.0;
        for (i = 0; i < n; i++) {
            	resid = (resid > fabs((double)b[i])) 
			? resid : fabs((double)b[i]);
            	normx = (normx > fabs((double)x[i])) 
			? normx : fabs((double)x[i]);
	}
        eps = epslon((REAL)ONE);
        residn = resid/( n*norma*normx*eps );
	
   	printf("     norm. resid      resid           machep");
        printf("         x[0]-1        x[n-1]-1\n");
	printf("  %8.1f      %16.8e%16.8e%16.8e%16.8e\n",
	       (double)residn, (double)resid, (double)eps, 
               (double)x[0]-1, (double)x[n-1]-1);

   	fprintf(stderr,"    times are reported for matrices of order %5d\n",n);
	fprintf(stderr,"      dgefa      dgesl      total       kflops     unit");
	fprintf(stderr,"      ratio\n");

        time[2][0] = total;
        time[3][0] = ops/(1.0e3*total);
        time[4][0] = 2.0e3/time[3][0];
        time[5][0] = total/cray;

   	fprintf(stderr," times for array with leading dimension of%5d\n",lda);
	print_time(0);

        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info);
        time[0][1] = second() - t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0);
        time[1][1] = second() - t1;
        total = time[0][1] + time[1][1];
        time[2][1] = total;
        time[3][1] = ops/(1.0e3*total);
        time[4][1] = 2.0e3/time[3][1];
        time[5][1] = total/cray;

        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info);
        time[0][2] = second() - t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0);
        time[1][2] = second() - t1;
        total = time[0][2] + time[1][2];
        time[2][2] = total;
        time[3][2] = ops/(1.0e3*total);
        time[4][2] = 2.0e3/time[3][2];
        time[5][2] = total/cray;

        ntimes = NTIMES;
        tm2 = 0.0;
        t1 = second();

	for (i = 0; i < ntimes; i++) {
            	tm = second();
		matgen(a,lda,n,b,&norma);
		tm2 = tm2 + second() - tm;
		dgefa(a,lda,n,ipvt,&info);
	}

        time[0][3] = (second() - t1 - tm2)/ntimes;
        t1 = second();

	for (i = 0; i < ntimes; i++) {
            	dgesl(a,lda,n,ipvt,b,0);
	}

        time[1][3] = (second() - t1)/ntimes;
        total = time[0][3] + time[1][3];
        time[2][3] = total;
        time[3][3] = ops/(1.0e3*total);
        time[4][3] = 2.0e3/time[3][3];
        time[5][3] = total/cray;

	print_time(1);
	print_time(2);
	print_time(3);

        matgen(aa,ldaa,n,b,&norma);
        t1 = second();
        dgefa(aa,ldaa,n,ipvt,&info);
        time[0][4] = second() - t1;
        t1 = second();
        dgesl(aa,ldaa,n,ipvt,b,0);
        time[1][4] = second() - t1;
        total = time[0][4] + time[1][4];
        time[2][4] = total;
        time[3][4] = ops/(1.0e3*total);
        time[4][4] = 2.0e3/time[3][4];
        time[5][4] = total/cray;

        matgen(aa,ldaa,n,b,&norma);
        t1 = second();
        dgefa(aa,ldaa,n,ipvt,&info);
        time[0][5] = second() - t1;
        t1 = second();
        dgesl(aa,ldaa,n,ipvt,b,0);
        time[1][5] = second() - t1;
        total = time[0][5] + time[1][5];
        time[2][5] = total;
        time[3][5] = ops/(1.0e3*total);
        time[4][5] = 2.0e3/time[3][5];
        time[5][5] = total/cray;

	matgen(aa,ldaa,n,b,&norma);
	t1 = second();
	dgefa(aa,ldaa,n,ipvt,&info);
	time[0][6] = second() - t1;
	t1 = second();
	dgesl(aa,ldaa,n,ipvt,b,0);
	time[1][6] = second() - t1;
	total = time[0][6] + time[1][6];
	time[2][6] = total;
	time[3][6] = ops/(1.0e3*total);
	time[4][6] = 2.0e3/time[3][6];
	time[5][6] = total/cray;

	ntimes = NTIMES;
	tm2 = 0;
	t1 = second();
	for (i = 0; i < ntimes; i++) {
		tm = second();
		matgen(aa,ldaa,n,b,&norma);
		tm2 = tm2 + second() - tm;
		dgefa(aa,ldaa,n,ipvt,&info);
	}
	time[0][7] = (second() - t1 - tm2)/ntimes;
	t1 = second();
	for (i = 0; i < ntimes; i++) {
		dgesl(aa,ldaa,n,ipvt,b,0);
	}
	time[1][7] = (second() - t1)/ntimes;
	total = time[0][7] + time[1][7];
	time[2][7] = total;
	time[3][7] = ops/(1.0e3*total);
	time[4][7] = 2.0e3/time[3][7];
	time[5][7] = total/cray;

	/* the following code sequence implements the semantics of
	   the Fortran intrinsics "nint(min(time[3][3],time[3][7]))"	*/

	kf = (time[3][3] < time[3][7]) ? time[3][3] : time[3][7];
	kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
	if (fabs((double)kf) < ONE) 
		kflops = 0;
	else {
		kflops = floor(fabs((double)kf));
		if (kf < ZERO) kflops = -kflops;
	}

	fprintf(stderr," times for array with leading dimension of%4d\n",ldaa);
	print_time(4);
	print_time(5);
	print_time(6);
	print_time(7);
	fprintf(stderr,ROLLING);fprintf(stderr,PREC);
	fprintf(stderr," Precision %5d Kflops ; %d Reps \n",kflops,NTIMES);
}
Example #7
0
int
clinpack_kflops ( int ntimes )
{
   static REAL aa[200][200],a[200][201],b[200],x[200];
   REAL cray,ops,total,norma,normx;
   REAL resid,residn,eps;
   REAL kf;
   double t1,tm,tm2;
   static int ipvt[200],n,i,info,lda,ldaa,kflops;

#if defined(WIN32)
   static float one_tick = .0001;
#else
   static long clock_tick = -1;
   static float one_tick;
   if ( clock_tick < 1 || clock_tick > 1000) {
                clock_tick = sysconf( _SC_CLK_TCK );
					/* clock_tick is the number of ticks per second */
				one_tick = (float) 1 / clock_tick;
					/* one_tick is the length of time for one tick */
   }
#endif

   lda = 201;
   ldaa = 200;
   cray = .056; 
   n = 100;


	ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

	matgen((double *)a,lda,n,b,&norma);
	t1 = dtime();
	dgefa((double *)a,lda,n,ipvt,&info);
	st[0][0] = dtime() - t1;
	
	t1 = dtime();
	dgesl((double *)a,lda,n,ipvt,b,0);
	st[1][0] = dtime() - t1;

	total = st[0][0] + st[1][0];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

/*     compute a residual to verify results.  */ 

	for (i = 0; i < n; i++)
	   {
	       x[i] = b[i];
	   }
	matgen((double *)a,lda,n,b,&norma);
	for (i = 0; i < n; i++) 
	   {
	       b[i] = -b[i];
	   }
	dmxpy(n,b,n,lda,x,(double *)a);
	resid = 0.0;
	normx = 0.0;
	for (i = 0; i < n; i++)
	 {
	       resid = (resid > fabs((double)b[i])) 
	 ? resid : fabs((double)b[i]);
	       normx = (normx > fabs((double)x[i])) 
	 ? normx : fabs((double)x[i]);
	 }
	eps = epslon((REAL)ONE);
	residn = resid/( n*norma*normx*eps );
   

	st[2][0] = total;
	st[3][0] = ops/(1.0e3*total);
	st[4][0] = 2.0e3/st[3][0];
	st[5][0] = total/cray;


	matgen((double *)a,lda,n,b,&norma);
	t1 = dtime();
	dgefa((double *)a,lda,n,ipvt,&info);
	st[0][1] = dtime() - t1;
	
	t1 = dtime();
	dgesl((double *)a,lda,n,ipvt,b,0);
	st[1][1] = dtime() - t1;

	total = st[0][1] + st[1][1];
	
		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

	st[2][1] = total;
	st[3][1] = ops/(1.0e3*total);
	st[4][1] = 2.0e3/st[3][1];
	st[5][1] = total/cray;

	matgen((double *)a,lda,n,b,&norma);
	
	t1 = dtime();
	dgefa((double *)a,lda,n,ipvt,&info);
	st[0][2] = dtime() - t1;
	
	t1 = dtime();
	dgesl((double *)a,lda,n,ipvt,b,0);
	st[1][2] = dtime() - t1;

	total = st[0][2] + st[1][2];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

	st[2][2] = total;
	st[3][2] = ops/(1.0e3*total);
	st[4][2] = 2.0e3/st[3][2];
	st[5][2] = total/cray;

	tm2 = 0.0;
	t1 = dtime();

   for (i = 0; i < ntimes; i++) {
	       tm = dtime();
      matgen((double *)a,lda,n,b,&norma);
      tm2 = tm2 + dtime() - tm;
      dgefa((double *)a,lda,n,ipvt,&info);
      }

	st[0][3] = (dtime() - t1 - tm2)/ntimes;
	t1 = dtime();

   for (i = 0; i < ntimes; i++) {
	       dgesl((double *)a,lda,n,ipvt,b,0);
      }

	st[1][3] = (dtime() - t1)/ntimes;

	total = st[0][3] + st[1][3];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

	st[2][3] = total;
	st[3][3] = ops/(1.0e3*total);
	st[4][3] = 2.0e3/st[3][3];
	st[5][3] = total/cray;

	matgen((double *)aa,ldaa,n,b,&norma);
	t1 = dtime();
	dgefa((double *)aa,ldaa,n,ipvt,&info);
	st[0][4] = dtime() - t1;
	
	t1 = dtime();
	dgesl((double *)aa,ldaa,n,ipvt,b,0);
	st[1][4] = dtime() - t1;

	total = st[0][4] + st[1][4];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

	st[2][4] = total;
	st[3][4] = ops/(1.0e3*total);
	st[4][4] = 2.0e3/st[3][4];
	st[5][4] = total/cray;

	matgen((double *)aa,ldaa,n,b,&norma);
	t1 = dtime();
	dgefa((double *)aa,ldaa,n,ipvt,&info);
	st[0][5] = dtime() - t1;

	t1 = dtime();
	dgesl((double *)aa,ldaa,n,ipvt,b,0);
	st[1][5] = dtime() - t1;

	total = st[0][5] + st[1][5];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

	st[2][5] = total;
	st[3][5] = ops/(1.0e3*total);
	st[4][5] = 2.0e3/st[3][5];
	st[5][5] = total/cray;

   matgen((double *)aa,ldaa,n,b,&norma);
   t1 = dtime();
   dgefa((double *)aa,ldaa,n,ipvt,&info);
   st[0][6] = dtime() - t1;

   t1 = dtime();
   dgesl((double *)aa,ldaa,n,ipvt,b,0);
   st[1][6] = dtime() - t1;

   total = st[0][6] + st[1][6];

	   /* 
		  On extremely fast machines, the total time between checks
		  can be less than the resolution of the clock.  In this
		  case, total will be 0.  Set it to the time 1 clock tick
		  takes as a way to avoid dividing by 0.
		  Derek Wright, 9/4/97 
		*/
   if( total == 0 ) total = one_tick;

   st[2][6] = total;
   st[3][6] = ops/(1.0e3*total);
   st[4][6] = 2.0e3/st[3][6];
   st[5][6] = total/cray;

   tm2 = 0;
   t1 = dtime();
   for (i = 0; i < ntimes; i++) {
      tm = dtime();
      matgen((double *)aa,ldaa,n,b,&norma);
      tm2 = tm2 + dtime() - tm;
      dgefa((double *)aa,ldaa,n,ipvt,&info);
      }

   st[0][7] = (dtime() - t1 - tm2)/ntimes;
   
   t1 = dtime();
   for (i = 0; i < ntimes; i++) {
      dgesl((double *)aa,ldaa,n,ipvt,b,0);
      }

   st[1][7] = (dtime() - t1)/ntimes;

   total = st[0][7] + st[1][7];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
   if( total == 0 ) total = one_tick;

   st[2][7] = total;
   st[3][7] = ops/(1.0e3*total);
   st[4][7] = 2.0e3/st[3][7];
   st[5][7] = total/cray;

   /* the following code sequence implements the semantics of
      the Fortran intrinsics "nint(min(st[3][3],st[3][7]))"   */
/*
   kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7];
   kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
   if (fabs((double)kf) < ONE) 
      kflops = 0;
   else {
      kflops = floor(fabs((double)kf));
      if (kf < ZERO) kflops = -kflops;
   }
*/
   if ( st[3][3] < ZERO ) st[3][3] = ZERO;
   if ( st[3][7] < ZERO ) st[3][7] = ZERO;
   kf = st[3][3];
   if ( st[3][7] < st[3][3] ) kf = st[3][7];
   kflops = (int)(kf + 0.5);

   return kflops;
}
Example #8
0
jobject Java_rs_pedjaapps_Linpack_MainActivity_runLinpack (JNIEnv* env, jobject thiz, jclass resultClass)
{
    __android_log_write (ANDROID_LOG_DEBUG, "linpack-jni.c", "running neon linpack");
        static REAL aa[200*200],a[200*201],b[200],x[200];       
        REAL cray,ops,total,norma,normx;
        REAL resid,residn,eps,tm2,epsn,x1,x2;
        REAL mflops;
        static int ipvt[200],n,i,j,ntimes,info,lda,ldaa;
        int endit, pass, loop;
        REAL overhead1, overhead2, time2;
        REAL max1, max2;
        char resultchars[1000];
        

        lda = 201;
        ldaa = 200;
        cray = .056; 
        n = 100;

        ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

        matgen(a,lda,n,b,&norma);
        start_time();
        dgefa(a,lda,n,ipvt,&info);
        end_time();
        atime[0][0] = secs;
        start_time();
        dgesl(a,lda,n,ipvt,b,0);
        end_time();
        atime[1][0] = secs;
        total = atime[0][0] + atime[1][0];

//     compute a residual to verify results. 

        for (i = 0; i < n; i++) {
                x[i] = b[i];
        }
        matgen(a,lda,n,b,&norma);
        for (i = 0; i < n; i++) {
                b[i] = -b[i];
        }
        dmxpy(n,b,n,lda,x,a);
        resid = 0.0;
        normx = 0.0;
        for (i = 0; i < n; i++) {
                resid = (resid > fabs((double)b[i])) 
                        ? resid : fabs((double)b[i]);
                normx = (normx > fabs((double)x[i])) 
                        ? normx : fabs((double)x[i]);
        }
        eps = epslon(ONE);
        residn = resid/( n*norma*normx*eps );
        epsn = eps;
        x1 = x[0] - 1;
        x2 = x[n-1] - 1;
        

        atime[2][0] = total;
        if (total > 0.0)
        {
            atime[3][0] = ops/(1.0e6*total);
            atime[4][0] = 2.0/atime[3][0];
        }
        else
        {
            atime[3][0] = 0.0;
            atime[4][0] = 0.0;
        }
        atime[5][0] = total/cray;
       

// ************************************************************************
// *       Calculate overhead of executing matgen procedure              *
// ************************************************************************
       
        pass = -20;
        loop = NTIMES;
        do
        {
            start_time();
            pass = pass + 1;        
            for ( i = 0 ; i < loop ; i++)
            {
                 matgen(a,lda,n,b,&norma);
            }
            end_time();
            overhead1 = secs;
            if (overhead1 > runSecs)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (overhead1 < 0.1)
                {
                    loop = loop * 10;
                }
                else
                {
                    loop = loop * 2;
                }
            }
        }
        while (pass < 0);
        
        overhead1 = overhead1 / (double)loop;

 
// ************************************************************************
// *           Calculate matgen/dgefa passes for runSecs seconds                *
// ************************************************************************
       
        pass = -20;
        ntimes = NTIMES;
        do
        {
            start_time();
            pass = pass + 1;        
            for ( i = 0 ; i < ntimes ; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }
            end_time();
            time2 = secs;
            if (time2 > runSecs)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (time2 < 0.1)
                {
                    ntimes = ntimes * 10;
                }
                else
                {
                    ntimes = ntimes * 2;
                }
            }
        }
        while (pass < 0);
        
        ntimes =  (int)(runSecs * (double)ntimes / time2);
        if (ntimes == 0) ntimes = 1;


// ************************************************************************
// *                              Execute 5 passes                        *
// ************************************************************************
      
        tm2 = ntimes * overhead1;
        atime[3][6] = 0;

        for (j=1 ; j<6 ; j++)
        {
            start_time();
            for (i = 0; i < ntimes; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }
            end_time();
            atime[0][j] = (secs - tm2)/ntimes;

            start_time();              
            for (i = 0; i < ntimes; i++)
            {
                dgesl(a,lda,n,ipvt,b,0);
            }
            end_time();

            atime[1][j] = secs/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][6] = atime[3][6] + atime[3][j];
            
        }
        atime[3][6] = atime[3][6] / 5.0;

// ************************************************************************
// *             Calculate overhead of executing matgen procedure         *
// ************************************************************************

        start_time();        
        for ( i = 0 ; i < loop ; i++)
        {
            matgen(aa,ldaa,n,b,&norma);    
        }
        end_time();
        overhead2 = secs;
        overhead2 = overhead2 / (double)loop;
        

// ************************************************************************
// *                              Execute 5 passes                        *
// ************************************************************************
              
        tm2 = ntimes * overhead2;
        atime[3][12] = 0;

        for (j=7 ; j<12 ; j++)
        {
            start_time();
            for (i = 0; i < ntimes; i++)
            {
                matgen(aa,ldaa,n,b,&norma);
                dgefa(aa,ldaa,n,ipvt,&info  );
            }
            end_time();
            atime[0][j] = (secs - tm2)/ntimes;
            
            start_time();      
            for (i = 0; i < ntimes; i++)
            {
                dgesl(aa,ldaa,n,ipvt,b,0);
            }
            end_time();
            atime[1][j] = secs/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][12] = atime[3][12] + atime[3][j];

        }
        atime[3][12] = atime[3][12] / 5.0; 

// ************************************************************************
// *           Use minimum average as overall Mflops rating               *
// ************************************************************************
      
        mflops = atime[3][6];
        if (atime[3][12] < mflops) mflops = atime[3][12];


// ************************************************************************
// *              Add results to output file Linpack.txt                  *
// ************************************************************************

    max1 = 0;
    for (i=1 ; i<6 ; i++)
    {
        if (atime[3][i] > max1) max1 = atime[3][i];                 
    }

    max2 = 0;
    for (i=7 ; i<12 ; i++)
    {                 
        if (atime[3][i] > max2) max2 = atime[3][i];                 
    }
    if (max1 < max2) max2 = max1;

    jmethodID jConstructor = (*env)->GetMethodID (env, resultClass, "<init>", "()V");
    if (jConstructor == NULL)__android_log_write (ANDROID_LOG_ERROR, "linpack-jni.c", "jConstructor is NULL");
    jobject resultObject = (*env)->NewObject (env, resultClass, jConstructor);
    /*mFlops, residn, resid, epsn, x1, x2;*/
    jfieldID jMFlops = (*env)->GetFieldID (env, resultClass, "mflops", "D");
    jfieldID jResidn = (*env)->GetFieldID (env, resultClass, "nres", "D");
    jfieldID jEpsn = (*env)->GetFieldID (env, resultClass, "precision", "D");
    (*env)->SetDoubleField (env, resultObject, jMFlops, max2);
    (*env)->SetDoubleField (env, resultObject, jResidn, (double) residn);
    (*env)->SetDoubleField (env, resultObject, jEpsn, (double) epsn);
    return resultObject;
}