예제 #1
0
static REAL linpack(long nreps,int arsize)

{
    REAL  *a,*b;
    REAL   norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
    int   *ipvt,n,info,lda;
    long   i,arsize2d;
    
    lda = arsize;
    n = arsize/2;
    arsize2d = (long)arsize*(long)arsize;
    ops=((2.0*n*n*n)/3.0+2.0*n*n);
    a=(REAL *)mempool;
    b=a+arsize2d;
    ipvt=(int *)&b[arsize];
    tdgesl=0;
    tdgefa=0;
    totalt=second();
    for (i=0;i<nreps;i++)
    {
        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info,1);
        tdgefa += second()-t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0,1);
        tdgesl += second()-t1;
    }
    for (i=0;i<nreps;i++)
    {
        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info,0);
        tdgefa += second()-t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0,0);
        tdgesl += second()-t1;
    }
    totalt=second()-totalt;
    if (totalt<0.5 || tdgefa+tdgesl<0.2)
        return(0.);
    kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
    toverhead=totalt-tdgefa-tdgesl;
    if (tdgefa<0.)
        tdgefa=0.;
    if (tdgesl<0.)
        tdgesl=0.;
    if (toverhead<0.)
        toverhead=0.;
    printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%%  %9.3f\n",
           nreps,totalt,100.*tdgefa/totalt,
           100.*tdgesl/totalt,100.*toverhead/totalt,
           kflops/1000.0);
    if(totalt >= 4.0)
           call_objc_obj(kflops/1000.0);

	//printA(a,lda,arsize); //TODO ADDED BY ME FOR TESTING PURPOSES
    return(totalt);
}
예제 #2
0
static REAL linpack(long nreps,int arsize)

    {
    REAL  *a,*b;
    REAL   norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
    int   *ipvt,n,info,lda;
    long   i,arsize2d;

    lda = arsize;
    n = arsize/2;
    arsize2d = (long)arsize*(long)arsize;
    ops=((2.0*n*n*n)/3.0+2.0*n*n);
    a=(REAL *)mempool;
    b=a+arsize2d;
    ipvt=(int *)&b[arsize];
    tdgesl=0;
    tdgefa=0;
    totalt=second();
    for (i=0;i<nreps;i++)
        {
        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info,1);
        tdgefa += second()-t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0,1);
        tdgesl += second()-t1;
        }
    for (i=0;i<nreps;i++)
        {
        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info,0);
        tdgefa += second()-t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0,0);
        tdgesl += second()-t1;
        }
    totalt=second()-totalt;
    if (totalt<0.5 || tdgefa+tdgesl<0.2)
        return(0.);
    kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
    toverhead=totalt-tdgefa-tdgesl;
    if (tdgefa<0.)
        tdgefa=0.;
    if (tdgesl<0.)
        tdgesl=0.;
    if (toverhead<0.)
        toverhead=0.;
    printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%%  %9.3f\n",
            nreps,totalt,100.*tdgefa/totalt,
            100.*tdgesl/totalt,100.*toverhead/totalt,
            kflops/1000.0);
    if(totalt > 10.){ //publish the result in the benchmark database
    	publish_linpack_result("http://modev.mine.nu:8070/benchmark/publish_result.php", "1337", "MoSync", "987123ab", "HTC%20Wildfire", "2", kflops/1000.0);
    }
    return(totalt);
    }
예제 #3
0
static REAL linpack(long nreps,int arsize)

{
    REAL  *a,*b;
    REAL   norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
    int   *ipvt,n,info,lda;
    long   i,arsize2d;

    lda = arsize;
    n = arsize/2;
    arsize2d = (long)arsize*(long)arsize;
    ops=((2.0*n*n*n)/3.0+2.0*n*n);
    a=(REAL *)mempool;
    b=a+arsize2d;
    ipvt=(int *)&b[arsize];
    tdgesl=0;
    tdgefa=0;
    totalt=second();
    for (i=0; i<nreps; i++)
    {
        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info,1);
        tdgefa += second()-t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0,1);
        tdgesl += second()-t1;
    }
    for (i=0; i<nreps; i++)
    {
        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info,0);
        tdgefa += second()-t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0,0);
        tdgesl += second()-t1;
    }
    totalt=second()-totalt;
    kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
    toverhead=totalt-tdgefa-tdgesl;
    if (tdgefa<0.)
        tdgefa=0.;
    if (tdgesl<0.)
        tdgesl=0.;
    if (toverhead<0.)
        toverhead=0.;
    printf("%f\n",
           kflops);
    return(totalt);
}
예제 #4
0
파일: linpack.c 프로젝트: tomari/microbench
static REAL linpack(long nreps,int arsize)

    {
    REAL  *a,*b;
    REAL   norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
    int   *ipvt,n,info,lda;
    long   i,arsize2d;

    lda = arsize;
    n = arsize/2;
    arsize2d = (long)arsize*(long)arsize;
    ops=((2.0*n*n*n)/3.0+2.0*n*n);
    a=(REAL *)mempool;
    b=a+arsize2d;
    ipvt=(int *)&b[arsize];
    tdgesl=0;
    tdgefa=0;
    totalt=dtime();
    for (i=0;i<nreps;i++)
        {
        matgen(a,lda,n,b,&norma);
        t1 = dtime();
        dgefa(a,lda,n,ipvt,&info,roll);
        tdgefa += dtime()-t1;
        t1 = dtime();
        dgesl(a,lda,n,ipvt,b,0,roll);
        tdgesl += dtime()-t1;
        }
    totalt=dtime()-totalt;
    if (totalt<0.5 || tdgefa+tdgesl<0.2)
        return(0.);
    kflops=nreps*ops/(1000.*(tdgefa+tdgesl));
    toverhead=totalt-tdgefa-tdgesl;
    if (tdgefa<0.)
        tdgefa=0.;
    if (tdgesl<0.)
        tdgesl=0.;
    if (toverhead<0.)
        toverhead=0.;
    printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%%  %9.3f\n",
            nreps,totalt,100.*tdgefa/totalt,
            100.*tdgesl/totalt,100.*toverhead/totalt,
            kflops);
    return(totalt);
    }
int main (int argc, char *argv[])
{
        static REAL aa[200*200],a[200*201],b[200],x[200];       
        REAL cray,ops,total,norma,normx;
        REAL resid,residn,eps,tm2,epsn,x1,x2;
        REAL mflops;
        static int ipvt[200],n,i,j,ntimes,info,lda,ldaa;
        int endit, pass, loop;
        REAL overhead1, overhead2, time2;
        REAL max1, max2;
        char was[5][20];
        char expect[5][20];
        char title[5][20];
        int errors;
        
 
        printf("\n");
         
        printf("##########################################\n"); 


    
        lda = 201;
        ldaa = 200;
        cray = .056; 
        n = 100;

        fprintf(stdout, "%s ", ROLLING);
        fprintf(stdout, "%s ", PREC);
        fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n");

        fprintf(stdout,"Optimisation %s\n\n",options);

        ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

        matgen(a,lda,n,b,&norma);
        start_time();
        dgefa(a,lda,n,ipvt,&info);
        end_time();
        atime[0][0] = secs;
        start_time();
        dgesl(a,lda,n,ipvt,b,0);
        end_time();
        atime[1][0] = secs;
        total = atime[0][0] + atime[1][0];

/*     compute a residual to verify results.  */ 

        for (i = 0; i < n; i++) {
                x[i] = b[i];
        }
        matgen(a,lda,n,b,&norma);
        for (i = 0; i < n; i++) {
                b[i] = -b[i];
        }
        dmxpy(n,b,n,lda,x,a);
        resid = 0.0;
        normx = 0.0;
        for (i = 0; i < n; i++) {
                resid = (resid > fabs((double)b[i])) 
                        ? resid : fabs((double)b[i]);
                normx = (normx > fabs((double)x[i])) 
                        ? normx : fabs((double)x[i]);
        }
        eps = epslon(ONE);
        residn = resid/( n*norma*normx*eps );
        epsn = eps;
        x1 = x[0] - 1;
        x2 = x[n-1] - 1;
        
        printf("norm resid      resid           machep");
        printf("         x[0]-1          x[n-1]-1\n");
        printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n",
               (double)residn, (double)resid, (double)epsn, 
               (double)x1, (double)x2);

        fprintf(stderr,"Times are reported for matrices of order        %5d\n",n);
        fprintf(stderr,"1 pass times for array with leading dimension of%5d\n\n",lda);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");

        atime[2][0] = total;
        if (total > 0.0)
        {
            atime[3][0] = ops/(1.0e6*total);
            atime[4][0] = 2.0/atime[3][0];
        }
        else
        {
            atime[3][0] = 0.0;
            atime[4][0] = 0.0;
        }
        atime[5][0] = total/cray;
       
        print_time(0);

/************************************************************************
 *       Calculate overhead of executing matgen procedure              *
 ************************************************************************/
       
        fprintf (stderr,"\nCalculating matgen overhead\n");
        pass = -20;
        loop = NTIMES;
        do
        {
            start_time();
            pass = pass + 1;        
            for ( i = 0 ; i < loop ; i++)
            {
                 matgen(a,lda,n,b,&norma);
            }
            end_time();
            overhead1 = secs;
            fprintf (stderr,"%10d times %6.2f seconds\n", loop, overhead1);
            if (overhead1 > runSecs)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (overhead1 < 0.1)
                {
                    loop = loop * 10;
                }
                else
                {
                    loop = loop * 2;
                }
            }
        }
        while (pass < 0);
        
        overhead1 = overhead1 / (double)loop;

        fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1);

/************************************************************************
 *           Calculate matgen/dgefa passes for runSecs seconds                *
 ************************************************************************/
       
        fprintf (stderr,"Calculating matgen/dgefa passes for %d seconds\n", (int)runSecs);
        pass = -20;
        ntimes = NTIMES;
        do
        {
            start_time();
            pass = pass + 1;        
            for ( i = 0 ; i < ntimes ; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }
            end_time();
            time2 = secs;
            fprintf (stderr,"%10d times %6.2f seconds\n", ntimes, time2);
            if (time2 > runSecs)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (time2 < 0.1)
                {
                    ntimes = ntimes * 10;
                }
                else
                {
                    ntimes = ntimes * 2;
                }
            }
        }
        while (pass < 0);
        
        ntimes =  (int)(runSecs * (double)ntimes / time2);
        if (ntimes == 0) ntimes = 1;

        fprintf(stderr,"Passes used %10d \n\n", ntimes);
        fprintf(stderr,"Times for array with leading dimension of%4d\n\n",lda);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");        

/************************************************************************
 *                              Execute 5 passes                        *
 ************************************************************************/
      
        tm2 = ntimes * overhead1;
        atime[3][6] = 0;

        for (j=1 ; j<6 ; j++)
        {
            start_time();
            for (i = 0; i < ntimes; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }
            end_time();
            atime[0][j] = (secs - tm2)/ntimes;

            start_time();              
            for (i = 0; i < ntimes; i++)
            {
                dgesl(a,lda,n,ipvt,b,0);
            }
            end_time();

            atime[1][j] = secs/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][6] = atime[3][6] + atime[3][j];
            
            print_time(j);
        }
        atime[3][6] = atime[3][6] / 5.0;
        fprintf (stderr,"Average                          %11.2f\n",
                                               (double)atime[3][6]);        
        
        fprintf (stderr,"\nCalculating matgen2 overhead\n");

/************************************************************************
 *             Calculate overhead of executing matgen procedure         *
 ************************************************************************/

        start_time();        
        for ( i = 0 ; i < loop ; i++)
        {
            matgen(aa,ldaa,n,b,&norma);    
        }
        end_time();
        overhead2 = secs;
        overhead2 = overhead2 / (double)loop;
        
        fprintf(stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2);
        fprintf(stderr,"Times for array with leading dimension of%4d\n\n",ldaa);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");

/************************************************************************
 *                              Execute 5 passes                        *
 ************************************************************************/
              
        tm2 = ntimes * overhead2;
        atime[3][12] = 0;

        for (j=7 ; j<12 ; j++)
        {
            start_time();
            for (i = 0; i < ntimes; i++)
            {
                matgen(aa,ldaa,n,b,&norma);
                dgefa(aa,ldaa,n,ipvt,&info  );
            }
            end_time();
            atime[0][j] = (secs - tm2)/ntimes;
            
            start_time();      
            for (i = 0; i < ntimes; i++)
            {
                dgesl(aa,ldaa,n,ipvt,b,0);
            }
            end_time();
            atime[1][j] = secs/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][12] = atime[3][12] + atime[3][j];

            print_time(j);
        }
        atime[3][12] = atime[3][12] / 5.0; 
        fprintf (stderr,"Average                          %11.2f\n",
                                              (double)atime[3][12]);  

/************************************************************************
 *           Use minimum average as overall Mflops rating               *
 ************************************************************************/
      
        mflops = atime[3][6];
        if (atime[3][12] < mflops) mflops = atime[3][12];
       
        fprintf(stderr,"\n");
        fprintf(stderr, "%s ", ROLLING);
        fprintf(stderr, "%s ", PREC);
        fprintf(stderr," Precision %11.2f Mflops \n\n",mflops);


    max1 = 0;
    for (i=1 ; i<6 ; i++)
    {
        if (atime[3][i] > max1) max1 = atime[3][i];                 
    }

    max2 = 0;
    for (i=7 ; i<12 ; i++)
    {                 
        if (atime[3][i] > max2) max2 = atime[3][i];                 
    }
    if (max1 < max2) max2 = max1;
   
    sprintf(was[0], "%16.1f",(double)residn);
    sprintf(was[1], "%16.8e",(double)resid);
    sprintf(was[2], "%16.8e",(double)epsn);
    sprintf(was[3], "%16.8e",(double)x1);
    sprintf(was[4], "%16.8e",(double)x2);

/*
    //  Values for Watcom

    sprintf(expect[0], "             0.4");
    sprintf(expect[1], " 7.41628980e-014");
    sprintf(expect[2], " 1.00000000e-015");
    sprintf(expect[3], "-1.49880108e-014");
    sprintf(expect[4], "-1.89848137e-014");
    // Values for Visual C++

    sprintf(expect[0], "             1.7");
    sprintf(expect[1], " 7.41628980e-014");
    sprintf(expect[2], " 2.22044605e-016");
    sprintf(expect[3], "-1.49880108e-014");
    sprintf(expect[4], "-1.89848137e-014");

    // Values for Ubuntu GCC 32 Bit

    sprintf(expect[0], "             1.9");
    sprintf(expect[1], "  8.39915160e-14");
    sprintf(expect[2], "  2.22044605e-16");
    sprintf(expect[3], " -6.22835117e-14");
    sprintf(expect[4], " -4.16333634e-14");
*/

     // Values for Ubuntu GCC 32 Bit

    sprintf(expect[0], "             1.7");
    sprintf(expect[1], "  7.41628980e-14");
    sprintf(expect[2], "  2.22044605e-16");
    sprintf(expect[3], " -1.49880108e-14");
    sprintf(expect[4], " -1.89848137e-14");

    sprintf(title[0], "norm. resid");
    sprintf(title[1], "resid      ");
    sprintf(title[2], "machep     ");
    sprintf(title[3], "x[0]-1     ");
    sprintf(title[4], "x[n-1]-1   ");

    if (strtol(opt, NULL, 10) == 0)
    {
        sprintf(expect[2], " 8.88178420e-016");
    }
    errors = 0;

    printf ("\n");
}
예제 #6
0
main ()
#endif
{
   static REAL aa[ORDER2][ORDER2],a[ORDER2][ORDER2P1],b[ORDER2],x[ORDER2];
   REAL cray,ops,total,norma,normx;
   REAL resid,residn,eps;
   REAL kf;
   double t1,tm,tm2,dtime();
   static int ipvt[ORDER2],n,i,ntimes,info,lda,ldaa,kflops;

   lda = ORDER2P1;
   ldaa = ORDER2;
   cray = .056; 
   n = ORDER;

#ifdef OMPC
   {
     int		c;
     extern char	*optarg;

     while ((c = getopt (argc, argv, "b:")) != EOF) {
       switch (c) {
       case 'b':
	 bf = atoi (optarg);
	 break;
       }
     }
   }

   if (omp_get_max_threads () != 1) {
     printf ("OpenMP(%d threads)\n", omp_get_max_threads ());
   } else {
     printf ("OpenMP(1 thread)\n");
   }
#endif
   printf(ROLLING); printf(PREC);
   printf("Precision Linpack\n\n");

       ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

       matgen(a,lda,n,b,&norma);
       t1 = dtime();
       dgefa(a,lda,n,ipvt,&info);
       st[0][0] = dtime() - t1;
       
       t1 = dtime();
       dgesl(a,lda,n,ipvt,b,0);
       st[1][0] = dtime() - t1;
       total = st[0][0] + st[1][0];

/*     compute a residual to verify results.  */ 

       for (i = 0; i < n; i++)
	  {
		x[i] = b[i];
	  }
       matgen(a,lda,n,b,&norma);
       for (i = 0; i < n; i++) 
	  {
		b[i] = -b[i];
	  }
       dmxpy(n,b,n,lda,x,a);
       resid = 0.0;
       normx = 0.0;
       for (i = 0; i < n; i++)
	{
		resid = (resid > fabs((double)b[i])) 
	? resid : fabs((double)b[i]);
		normx = (normx > fabs((double)x[i])) 
	? normx : fabs((double)x[i]);
	}
       eps = epslon((REAL)ONE);
       residn = resid/( n*norma*normx*eps );
   
   printf("   norm. resid      resid           machep");
   printf("         x[0]-1        x[n-1]-1\n");
   printf("%8.1f      %16.8e%16.8e%16.8e%16.8e\n",
	 (double)residn, (double)resid, (double)eps, 
		(double)x[0]-1, (double)x[n-1]-1);

printf(" times are reported for matrices of order %5d\n",n);
printf("      dgefa      dgesl      total       kflops     unit");
printf("      ratio\n");

       st[2][0] = total;
       st[3][0] = ops/(1.0e3*total);
       st[4][0] = 2.0e3/st[3][0];
       st[5][0] = total/cray;

   printf(" times for array with leading dimension of%5d\n",lda);
   print_time(0);

       matgen(a,lda,n,b,&norma);
       t1 = dtime();
       dgefa(a,lda,n,ipvt,&info);
       st[0][1] = dtime() - t1;
       
       t1 = dtime();
       dgesl(a,lda,n,ipvt,b,0);
       st[1][1] = dtime() - t1;
       total = st[0][1] + st[1][1];
       
       st[2][1] = total;
       st[3][1] = ops/(1.0e3*total);
       st[4][1] = 2.0e3/st[3][1];
       st[5][1] = total/cray;

       matgen(a,lda,n,b,&norma);
       
       t1 = dtime();
       dgefa(a,lda,n,ipvt,&info);
       st[0][2] = dtime() - t1;
       
       t1 = dtime();
       dgesl(a,lda,n,ipvt,b,0);
       st[1][2] = dtime() - t1;
       
       total = st[0][2] + st[1][2];
       st[2][2] = total;
       st[3][2] = ops/(1.0e3*total);
       st[4][2] = 2.0e3/st[3][2];
       st[5][2] = total/cray;

       ntimes = NTIMES;
       tm2 = 0.0;
       t1 = dtime();

   for (i = 0; i < ntimes; i++) {
		tm = dtime();
       matgen(a,lda,n,b,&norma);
       tm2 = tm2 + dtime() - tm;
       dgefa(a,lda,n,ipvt,&info);
       }

       st[0][3] = (dtime() - t1 - tm2)/ntimes;
       t1 = dtime();

   for (i = 0; i < ntimes; i++) {
		dgesl(a,lda,n,ipvt,b,0);
       }

       st[1][3] = (dtime() - t1)/ntimes;
       total = st[0][3] + st[1][3];
       st[2][3] = total;
       st[3][3] = ops/(1.0e3*total);
       st[4][3] = 2.0e3/st[3][3];
       st[5][3] = total/cray;

   print_time(1);
   print_time(2);
   print_time(3);

       matgen(aa,ldaa,n,b,&norma);
       t1 = dtime();
       dgefa(aa,ldaa,n,ipvt,&info);
       st[0][4] = dtime() - t1;
       
       t1 = dtime();
       dgesl(aa,ldaa,n,ipvt,b,0);
       st[1][4] = dtime() - t1;

       total = st[0][4] + st[1][4];
       st[2][4] = total;
       st[3][4] = ops/(1.0e3*total);
       st[4][4] = 2.0e3/st[3][4];
       st[5][4] = total/cray;

       matgen(aa,ldaa,n,b,&norma);
       t1 = dtime();
       dgefa(aa,ldaa,n,ipvt,&info);
       st[0][5] = dtime() - t1;

       t1 = dtime();
       dgesl(aa,ldaa,n,ipvt,b,0);
       st[1][5] = dtime() - t1;

       total = st[0][5] + st[1][5];
       st[2][5] = total;
       st[3][5] = ops/(1.0e3*total);
       st[4][5] = 2.0e3/st[3][5];
       st[5][5] = total/cray;

   matgen(aa,ldaa,n,b,&norma);
   t1 = dtime();
   dgefa(aa,ldaa,n,ipvt,&info);
   st[0][6] = dtime() - t1;

   t1 = dtime();
   dgesl(aa,ldaa,n,ipvt,b,0);
   st[1][6] = dtime() - t1;

   total = st[0][6] + st[1][6];
   st[2][6] = total;
   st[3][6] = ops/(1.0e3*total);
   st[4][6] = 2.0e3/st[3][6];
   st[5][6] = total/cray;

   ntimes = NTIMES;
   tm2 = 0;
   t1 = dtime();
   for (i = 0; i < ntimes; i++) {
       tm = dtime();
       matgen(aa,ldaa,n,b,&norma);
       tm2 = tm2 + dtime() - tm;
       dgefa(aa,ldaa,n,ipvt,&info);
       }

   st[0][7] = (dtime() - t1 - tm2)/ntimes;
   
   t1 = dtime();
   for (i = 0; i < ntimes; i++) {
       dgesl(aa,ldaa,n,ipvt,b,0);
       }

   st[1][7] = (dtime() - t1)/ntimes;
   total = st[0][7] + st[1][7];
   st[2][7] = total;
   st[3][7] = ops/(1.0e3*total);
   st[4][7] = 2.0e3/st[3][7];
   st[5][7] = total/cray;

   /* the following code sequence implements the semantics of
       the Fortran intrinsics "nint(min(st[3][3],st[3][7]))"   */
/*
   kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7];
   kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
   if (fabs((double)kf) < ONE) 
       kflops = 0;
   else {
       kflops = floor(fabs((double)kf));
       if (kf < ZERO) kflops = -kflops;
   }
*/
   if ( st[3][3] < ZERO ) st[3][3] = ZERO;
   if ( st[3][7] < ZERO ) st[3][7] = ZERO;
   kf = st[3][3];
   if ( st[3][7] < st[3][3] ) kf = st[3][7];
   kflops = (int)(kf + 0.5);

   printf(" times for array with leading dimension of%5d\n",ldaa);
   print_time(4);
   print_time(5);
   print_time(6);
   print_time(7);
   printf(ROLLING); printf(PREC);
   printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES);
}
예제 #7
0
main ()
{
        static REAL aa[200*200],a[200*201],b[200],x[200];       
        REAL cray,ops,total,norma,normx;
        REAL resid,residn,eps,t1,tm2,epsn,x1,x2;
        REAL mflops;
        static int ipvt[200],n,i,j,ntimes,info,lda,ldaa;
        int Endit, pass, loop;
        REAL overhead1, overhead2, time1, time2;
        FILE    *outfile;
        char *compiler, *options, general[9][80] = {" "}; 
         
        outfile = fopen("Linpack.txt","a+");
        if (outfile == NULL)
        {
            printf ("Cannot open results file \n\n");
            printf("Press any key\n");
            #ifdef DOS
            Endit = getch();
            #endif
            exit (0);
        }

/************************************************************************
 *           Enter details of compiler and options used                 *
 ************************************************************************/
                  /*----------------- --------- --------- ---------*/
        compiler = "INSERT COMPILER NAME HERE";
        options  = "INSERT OPTIMISATION OPTIONS HERE";
                  /* Include -dDP or -dSP and -dROLL or -dUNROLL */
    
        lda = 201;
        ldaa = 200;
        cray = .056; 
        n = 100;

        fprintf(stdout,ROLLING);fprintf(stdout,PREC);
        fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n");
        fprintf(stdout,"Compiler     %s\n",compiler);
        fprintf(stdout,"Optimisation %s\n\n",options);

        ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info);
        atime[0][0] = second() - t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0);
        atime[1][0] = second() - t1;
        total = atime[0][0] + atime[1][0];

/*     compute a residual to verify results.  */ 

        for (i = 0; i < n; i++) {
                x[i] = b[i];
        }
        matgen(a,lda,n,b,&norma);
        for (i = 0; i < n; i++) {
                b[i] = -b[i];
        }
        dmxpy(n,b,n,lda,x,a);
        resid = 0.0;
        normx = 0.0;
        for (i = 0; i < n; i++) {
                resid = (resid > fabs((double)b[i])) 
                        ? resid : fabs((double)b[i]);
                normx = (normx > fabs((double)x[i])) 
                        ? normx : fabs((double)x[i]);
        }
        eps = epslon(ONE);
        residn = resid/( n*norma*normx*eps );
        epsn = eps;
        x1 = x[0] - 1;
        x2 = x[n-1] - 1;
        
        printf("norm resid      resid           machep");
        printf("         x[0]-1          x[n-1]-1\n");
        printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n",
               (double)residn, (double)resid, (double)epsn, 
               (double)x1, (double)x2);

        fprintf(stderr,"Times are reported for matrices of order        %5d\n",n);
        fprintf(stderr,"1 pass times for array with leading dimension of%5d\n\n",lda);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");

        atime[2][0] = total;
        if (total > 0.0)
        {
            atime[3][0] = ops/(1.0e6*total);
            atime[4][0] = 2.0/atime[3][0];
        }
        else
        {
            atime[3][0] = 0.0;
            atime[4][0] = 0.0;
        }
        atime[5][0] = total/cray;
       
        print_time(0);

/************************************************************************
 *       Calculate overhead of executing matgen procedure              *
 ************************************************************************/
       
        fprintf (stderr,"\nCalculating matgen overhead\n");
        pass = -20;
        loop = NTIMES;
        do
        {
            time1 = second();
            pass = pass + 1;        
            for ( i = 0 ; i < loop ; i++)
            {
                 matgen(a,lda,n,b,&norma);
            }
            time2 = second();
            overhead1 = (time2 - time1);
            fprintf (stderr,"%10d times %6.2f seconds\n", loop, overhead1);
            if (overhead1 > 5.0)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (overhead1 < 0.1)
                {
                    loop = loop * 10;
                }
                else
                {
                    loop = loop * 2;
                }
            }
        }
        while (pass < 0);
        
        overhead1 = overhead1 / (double)loop;

        fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1);

/************************************************************************
 *           Calculate matgen/dgefa passes for 5 seconds                *
 ************************************************************************/
       
        fprintf (stderr,"Calculating matgen/dgefa passes for 5 seconds\n");
        pass = -20;
        ntimes = NTIMES;
        do
        {
            time1 = second();
            pass = pass + 1;        
            for ( i = 0 ; i < ntimes ; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }
            time2 = second() - time1;
            fprintf (stderr,"%10d times %6.2f seconds\n", ntimes, time2);
            if (time2 > 5.0)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (time2 < 0.1)
                {
                    ntimes = ntimes * 10;
                }
                else
                {
                    ntimes = ntimes * 2;
                }
            }
        }
        while (pass < 0);
        
        ntimes =  5.0 * (double)ntimes / time2;
        if (ntimes == 0) ntimes = 1;

        fprintf (stderr,"Passes used %10d \n\n", ntimes);
        fprintf(stderr,"Times for array with leading dimension of%4d\n\n",lda);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");        

/************************************************************************
 *                              Execute 5 passes                        *
 ************************************************************************/
      
        tm2 = ntimes * overhead1;
        atime[3][6] = 0;

        for (j=1 ; j<6 ; j++)
        {
        
            t1 = second();

            for (i = 0; i < ntimes; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }

            atime[0][j] = (second() - t1 - tm2)/ntimes;

            t1 = second();      
        
            for (i = 0; i < ntimes; i++)
            {
                dgesl(a,lda,n,ipvt,b,0);
            }

            atime[1][j] = (second() - t1)/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][6] = atime[3][6] + atime[3][j];
            
            print_time(j);
        }
        atime[3][6] = atime[3][6] / 5.0;
        fprintf (stderr,"Average                          %11.2f\n",
                                               (double)atime[3][6]);        
        
        fprintf (stderr,"\nCalculating matgen2 overhead\n");

/************************************************************************
 *             Calculate overhead of executing matgen procedure         *
 ************************************************************************/

        time1 = second();        
        for ( i = 0 ; i < loop ; i++)
        {
            matgen(aa,ldaa,n,b,&norma);    
        }
        time2 = second();
        overhead2 = (time2 - time1);
        overhead2 = overhead2 / (double)loop;
        
        fprintf (stderr,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2);
        fprintf(stderr,"Times for array with leading dimension of%4d\n\n",ldaa);
        fprintf(stderr,"      dgefa      dgesl      total     Mflops       unit");
        fprintf(stderr,"      ratio\n");

/************************************************************************
 *                              Execute 5 passes                        *
 ************************************************************************/
              
        tm2 = ntimes * overhead2;
        atime[3][12] = 0;

        for (j=7 ; j<12 ; j++)
        {
        
            t1 = second();

            for (i = 0; i < ntimes; i++)
            {
                matgen(aa,ldaa,n,b,&norma);
                dgefa(aa,ldaa,n,ipvt,&info  );
            }

            atime[0][j] = (second() - t1 - tm2)/ntimes;

            t1 = second();      
        
            for (i = 0; i < ntimes; i++)
            {
                dgesl(aa,ldaa,n,ipvt,b,0);
            }

            atime[1][j] = (second() - t1)/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][12] = atime[3][12] + atime[3][j];

            print_time(j);
        }
        atime[3][12] = atime[3][12] / 5.0; 
        fprintf (stderr,"Average                          %11.2f\n",
                                              (double)atime[3][12]);  

/************************************************************************
 *           Use minimum average as overall Mflops rating               *
 ************************************************************************/
      
        mflops = atime[3][6];
        if (atime[3][12] < mflops) mflops = atime[3][12];
       
        fprintf(stderr,"\n");
        fprintf(stderr,ROLLING);fprintf(stderr,PREC);
        fprintf(stderr," Precision %11.2f Mflops \n\n",mflops);

        what_date();

/************************************************************************
 *             Type details of hardware, software etc.                  *
 ************************************************************************/

    printf ("Enter the following data which will be "
                                "appended to file Linpack.txt \n\n");
    printf ("PC Supplier/model ?\n                    ");
    scanf ("%[^\n]", general[1]);
    fflush (stdin);
    printf ("CPU               ?\n                    ");
    scanf ("%[^\n]", general[2]);
    fflush (stdin);
    printf ("Clock MHz         ?\n                    ");
    scanf ("%[^\n]", general[3]);
    fflush (stdin);
    printf ("Cache             ?\n                    ");
    scanf ("%[^\n]", general[4]);
    fflush (stdin);
    printf ("Chipset/options   ?\n                    ");
    scanf ("%[^\n]", general[5]);
    fflush (stdin);
    printf ("OS/DOS version    ?\n                    ");
    scanf ("%[^\n]", general[6]);
    fflush (stdin);
    printf ("Your name         ?\n                    ");
    scanf ("%[^\n]", general[7]);
    fflush (stdin);
    printf ("Where from        ?\n                    ");
    scanf ("%[^\n]", general[8]);
    fflush (stdin);
    printf ("Mail address      ?\n                    ");
    scanf ("%[^\n]", general[0]);
    fflush (stdin);

/************************************************************************
 *              Add results to output file LLloops.txt                  *
 ************************************************************************/
            
    fprintf (outfile, "----------------- ----------------- --------- "
                      "--------- ---------\n");
    fprintf (outfile, "LINPACK BENCHMARK FOR PCs 'C/C++'    n @ 100\n\n");
    fprintf (outfile, "Month run         %d/%d\n", this_month, this_year);
    fprintf (outfile, "PC model          %s\n", general[1]);
    fprintf (outfile, "CPU               %s\n", general[2]);
    fprintf (outfile, "Clock MHz         %s\n", general[3]);
    fprintf (outfile, "Cache             %s\n", general[4]);
    fprintf (outfile, "Options           %s\n", general[5]);
    fprintf (outfile, "OS/DOS            %s\n", general[6]);
    fprintf (outfile, "Compiler          %s\n", compiler);
    fprintf (outfile, "OptLevel          %s\n", options);
    fprintf (outfile, "Run by            %s\n", general[7]);
    fprintf (outfile, "From              %s\n", general[8]);
    fprintf (outfile, "Mail              %s\n\n", general[0]);
    
    fprintf(outfile, "Rolling            %s\n",ROLLING);
    fprintf(outfile, "Precision          %s\n",PREC); 
    fprintf(outfile, "norm. resid        %16.1f\n",(double)residn);
    fprintf(outfile, "resid              %16.8e\n",(double)resid);
    fprintf(outfile, "machep             %16.8e\n",(double)epsn);
    fprintf(outfile, "x[0]-1             %16.8e\n",(double)x1);
    fprintf(outfile, "x[n-1]-1           %16.8e\n",(double)x2);
    fprintf(outfile, "matgen 1 seconds   %16.5f\n",overhead1);
    fprintf(outfile, "matgen 2 seconds   %16.5f\n",overhead2); 
    fprintf(outfile, "Repetitions        %16d\n",ntimes);
    fprintf(outfile, "Leading dimension  %16d\n",lda);  
    fprintf(outfile, "                              dgefa     dgesl "
                     "    total    Mflops\n");
    fprintf(outfile, "1 pass seconds     %16.5f %9.5f %9.5f\n",
                      atime[0][0], atime[1][0], atime[2][0]);
                      
    for (i=1 ; i<6 ; i++)
    {                 
        fprintf(outfile, "Repeat seconds     %16.5f %9.5f %9.5f %9.2f\n",                
                       atime[0][i], atime[1][i], atime[2][i], atime[3][i]);
    }
    fprintf(outfile, "Average            %46.2f\n",atime[3][6]);
    
    fprintf(outfile, "Leading dimension  %16d\n",ldaa);
     
    for (i=7 ; i<12 ; i++)
    {                 
        fprintf(outfile, "Repeat seconds     %16.5f %9.5f %9.5f %9.2f\n",                
                       atime[0][i], atime[1][i], atime[2][i], atime[3][i]);
    }
    fprintf(outfile, "Average            %46.2f\n\n",atime[3][12]); 
    
    fclose (outfile);
    
    printf("\nPress any key\n");
    #ifdef DOS
    Endit = getch();
    #endif
}
예제 #8
0
void main ()
{
   static REAL aa[200][200],a[200][201],b[200],x[200];
   REAL cray,ops,total,norma,normx;
   REAL resid,residn,eps;
   REAL epslon(),kf;
#if 0
   double t1;
   double tm;
#endif
   double tm2;
   double dtime();
   static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops;
   static user_timer second_timer;

   lda = 201;
   ldaa = 200;
   cray = .056; 
   n = 100;

   printf(ROLLING); printf(PREC);
   printf("Precision Linpack\n\n");

	ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

	matgen(a,lda,n,b,&norma);
	TimerOn();
	dgefa(a,lda,n,ipvt,&info);
	TimerOff();
	st[0][0] = TimerElapsed();
	Report( "clinpack(dgefa#1)", st[0][0] );
	
	TimerOn();
	dgesl(a,lda,n,ipvt,b,0);
	TimerOff();
	st[1][0] = TimerElapsed();
	Report( "clinpack(dgesl#1)", st[1][0] );
	total = st[0][0] + st[1][0];

/*     compute a residual to verify results.  */ 

	for (i = 0; i < n; i++)
	   {
		 x[i] = b[i];
	   }
	matgen(a,lda,n,b,&norma);
	for (i = 0; i < n; i++) 
	   {
		 b[i] = -b[i];
	   }
	dmxpy(n,b,n,lda,x,a);
	resid = 0.0;
	normx = 0.0;
	for (i = 0; i < n; i++)
	 {
		 resid = (resid > fabs((double)b[i])) 
	 ? resid : fabs((double)b[i]);
		 normx = (normx > fabs((double)x[i])) 
	 ? normx : fabs((double)x[i]);
	 }
	eps = epslon((REAL)ONE);
	residn = resid/( n*norma*normx*eps );
   
   printf("   norm. resid      resid           machep");
   printf("         x[0]-1        x[n-1]-1\n");
   printf("%8.1f      %16.8e%16.8e%16.8e%16.8e\n",
	  (double)residn, (double)resid, (double)eps, 
		 (double)x[0]-1, (double)x[n-1]-1);

printf(" times are reported for matrices of order %5d\n",n);
printf("      dgefa      dgesl      total       kflops     unit");
printf("      ratio\n");

	st[2][0] = total;
	st[3][0] = ops/(1.0e3*total);
	st[4][0] = 2.0e3/st[3][0];
	st[5][0] = total/cray;

   printf(" times for array with leading dimension of%5d\n",lda);
   print_time(0);

	matgen(a,lda,n,b,&norma);
	TimerOn();
	dgefa(a,lda,n,ipvt,&info);
	TimerOff();
	st[0][1] = TimerElapsed();
	Report( "clinpack(dgefa#2)", st[0][1] );
	
	TimerOn();
	dgesl(a,lda,n,ipvt,b,0);
	TimerOff();
	st[1][1] = TimerElapsed();
	Report( "clinpack(dgesl#2)", st[1][1] );
	total = st[0][1] + st[1][1];
	
	st[2][1] = total;
	st[3][1] = ops/(1.0e3*total);
	st[4][1] = 2.0e3/st[3][1];
	st[5][1] = total/cray;

	matgen(a,lda,n,b,&norma);
	
	TimerOn();
	dgefa(a,lda,n,ipvt,&info);
	TimerOff();
	st[0][2] = TimerElapsed();
	Report( "clinpack(dgefa#3)", st[0][2] );
	
	TimerOn();
	dgesl(a,lda,n,ipvt,b,0);
	TimerOff();
	st[1][2] = TimerElapsed();
	Report( "clinpack(dgesl#3)", st[1][2] );
	
	total = st[0][2] + st[1][2];
	st[2][2] = total;
	st[3][2] = ops/(1.0e3*total);
	st[4][2] = 2.0e3/st[3][2];
	st[5][2] = total/cray;

	ntimes = NTIMES;
	tm2 = 0.0;
	UserTimerOn( &second_timer );

   for (i = 0; i < ntimes; i++) {
	TimerOn();
	matgen(a,lda,n,b,&norma);
	TimerOff();
	tm2 = tm2 + TimerElapsed();
	dgefa(a,lda,n,ipvt,&info);
	}

	UserTimerOff( &second_timer );
	st[0][3] = ( UserTimerElapsed( &second_timer ) - tm2)/ntimes;
	Report( "clinpack(dgefa#4)", st[0][3] );

	TimerOn();
   for (i = 0; i < ntimes; i++) {
		 dgesl(a,lda,n,ipvt,b,0);
	}
	TimerOff();

	st[1][3] = TimerElapsed()/ntimes;
	Report( "clinpack(dgesl#4)", st[1][3] );
	total = st[0][3] + st[1][3];
	st[2][3] = total;
	st[3][3] = ops/(1.0e3*total);
	st[4][3] = 2.0e3/st[3][3];
	st[5][3] = total/cray;

   print_time(1);
   print_time(2);
   print_time(3);

	matgen(aa,ldaa,n,b,&norma);
	TimerOn();
	dgefa(aa,ldaa,n,ipvt,&info);
	TimerOff();
	st[0][4] = TimerElapsed();
	Report( "clinpack(dgefa#5)", st[0][4] );
	
	TimerOn();
	dgesl(aa,ldaa,n,ipvt,b,0);
	TimerOff();
	st[1][4] = TimerElapsed();
	Report( "clinpack(dgesl#5)", st[1][4] );

	total = st[0][4] + st[1][4];
	st[2][4] = total;
	st[3][4] = ops/(1.0e3*total);
	st[4][4] = 2.0e3/st[3][4];
	st[5][4] = total/cray;

	matgen(aa,ldaa,n,b,&norma);
	TimerOn();
	dgefa(aa,ldaa,n,ipvt,&info);
	TimerOff();
	st[0][5] = TimerElapsed();
	Report( "clinpack(dgefa#6)", st[0][5] );

	TimerOn();
	dgesl(aa,ldaa,n,ipvt,b,0);
	TimerOff();
	st[1][5] = TimerElapsed();
	Report( "clinpack(dgesl#6)", st[1][5] );

	total = st[0][5] + st[1][5];
	st[2][5] = total;
	st[3][5] = ops/(1.0e3*total);
	st[4][5] = 2.0e3/st[3][5];
	st[5][5] = total/cray;

   matgen(aa,ldaa,n,b,&norma);
   TimerOn();
   dgefa(aa,ldaa,n,ipvt,&info);
   TimerOff();
   st[0][6] = TimerElapsed();
   Report( "clinpack(dgefa#7)", st[0][6] );

   TimerOn();
   dgesl(aa,ldaa,n,ipvt,b,0);
   TimerOff();
   st[1][6] = TimerElapsed();
   Report( "clinpack(dgesl#7)", st[1][6] );

   total = st[0][6] + st[1][6];
   st[2][6] = total;
   st[3][6] = ops/(1.0e3*total);
   st[4][6] = 2.0e3/st[3][6];
   st[5][6] = total/cray;

   ntimes = NTIMES;
   tm2 = 0;
   UserTimerOn( &second_timer );
   for (i = 0; i < ntimes; i++) {
	TimerOn();
	matgen(aa,ldaa,n,b,&norma);
	TimerOff();
	tm2 = tm2 + TimerElapsed();
	dgefa(aa,ldaa,n,ipvt,&info);
	}
   UserTimerOff( &second_timer );

   st[0][7] = ( UserTimerElapsed( &second_timer ) - tm2 ) / ntimes;
   Report( "clinpack(dgefa#8)", st[0][7] );
   
   TimerOn();
   for (i = 0; i < ntimes; i++) {
	dgesl(aa,ldaa,n,ipvt,b,0);
	}
   TimerOff();

   st[1][7] = TimerElapsed()/ntimes;
   Report( "clinpack(dgesl#8)", st[1][7] );

   total = st[0][7] + st[1][7];
   st[2][7] = total;
   st[3][7] = ops/(1.0e3*total);
   st[4][7] = 2.0e3/st[3][7];
   st[5][7] = total/cray;

   /* the following code sequence implements the semantics of
	the Fortran intrinsics "nint(min(st[3][3],st[3][7]))"   */
/*
   kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7];
   kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
   if (fabs((double)kf) < ONE) 
	kflops = 0;
   else {
	kflops = floor(fabs((double)kf));
	if (kf < ZERO) kflops = -kflops;
   }
*/
   if ( st[3][3] < ZERO ) st[3][3] = ZERO;
   if ( st[3][7] < ZERO ) st[3][7] = ZERO;
   kf = st[3][3];
   if ( st[3][7] < st[3][3] ) kf = st[3][7];
   kflops = (int)(kf + 0.5);

   printf(" times for array with leading dimension of%4d\n",ldaa);
   print_time(4);
   print_time(5);
   print_time(6);
   print_time(7);
   printf(ROLLING); printf(PREC);
   printf(" Precision %5d Kflops ; %d Reps \n",kflops,NTIMES);
   exit( EXIT_SUCCESS );
}
예제 #9
0
main ()
{
	static REAL aa[200][200],a[200][201],b[200],x[200];
	REAL cray,ops,total,norma,normx;
	REAL resid,residn,eps,t1,tm,tm2;
	REAL epslon(),second(),kf;
	static int ipvt[200],n,i,ntimes,info,lda,ldaa,kflops;

	lda = 201;
	ldaa = 200;
	cray = .056; 
	n = 100;

	fprintf(stdout,ROLLING);fprintf(stdout,PREC);fprintf(stdout,"Precision Linpack\n\n");
	fprintf(stderr,ROLLING);fprintf(stderr,PREC);fprintf(stderr,"Precision Linpack\n\n");

        ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info);
        time[0][0] = second() - t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0);
        time[1][0] = second() - t1;
        total = time[0][0] + time[1][0];

/*     compute a residual to verify results.  */ 

        for (i = 0; i < n; i++) {
            	x[i] = b[i];
	}
        matgen(a,lda,n,b,&norma);
        for (i = 0; i < n; i++) {
            	b[i] = -b[i];
	}
        dmxpy(n,b,n,lda,x,a);
        resid = 0.0;
        normx = 0.0;
        for (i = 0; i < n; i++) {
            	resid = (resid > fabs((double)b[i])) 
			? resid : fabs((double)b[i]);
            	normx = (normx > fabs((double)x[i])) 
			? normx : fabs((double)x[i]);
	}
        eps = epslon((REAL)ONE);
        residn = resid/( n*norma*normx*eps );
	
   	printf("     norm. resid      resid           machep");
        printf("         x[0]-1        x[n-1]-1\n");
	printf("  %8.1f      %16.8e%16.8e%16.8e%16.8e\n",
	       (double)residn, (double)resid, (double)eps, 
               (double)x[0]-1, (double)x[n-1]-1);

   	fprintf(stderr,"    times are reported for matrices of order %5d\n",n);
	fprintf(stderr,"      dgefa      dgesl      total       kflops     unit");
	fprintf(stderr,"      ratio\n");

        time[2][0] = total;
        time[3][0] = ops/(1.0e3*total);
        time[4][0] = 2.0e3/time[3][0];
        time[5][0] = total/cray;

   	fprintf(stderr," times for array with leading dimension of%5d\n",lda);
	print_time(0);

        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info);
        time[0][1] = second() - t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0);
        time[1][1] = second() - t1;
        total = time[0][1] + time[1][1];
        time[2][1] = total;
        time[3][1] = ops/(1.0e3*total);
        time[4][1] = 2.0e3/time[3][1];
        time[5][1] = total/cray;

        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info);
        time[0][2] = second() - t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0);
        time[1][2] = second() - t1;
        total = time[0][2] + time[1][2];
        time[2][2] = total;
        time[3][2] = ops/(1.0e3*total);
        time[4][2] = 2.0e3/time[3][2];
        time[5][2] = total/cray;

        ntimes = NTIMES;
        tm2 = 0.0;
        t1 = second();

	for (i = 0; i < ntimes; i++) {
            	tm = second();
		matgen(a,lda,n,b,&norma);
		tm2 = tm2 + second() - tm;
		dgefa(a,lda,n,ipvt,&info);
	}

        time[0][3] = (second() - t1 - tm2)/ntimes;
        t1 = second();

	for (i = 0; i < ntimes; i++) {
            	dgesl(a,lda,n,ipvt,b,0);
	}

        time[1][3] = (second() - t1)/ntimes;
        total = time[0][3] + time[1][3];
        time[2][3] = total;
        time[3][3] = ops/(1.0e3*total);
        time[4][3] = 2.0e3/time[3][3];
        time[5][3] = total/cray;

	print_time(1);
	print_time(2);
	print_time(3);

        matgen(aa,ldaa,n,b,&norma);
        t1 = second();
        dgefa(aa,ldaa,n,ipvt,&info);
        time[0][4] = second() - t1;
        t1 = second();
        dgesl(aa,ldaa,n,ipvt,b,0);
        time[1][4] = second() - t1;
        total = time[0][4] + time[1][4];
        time[2][4] = total;
        time[3][4] = ops/(1.0e3*total);
        time[4][4] = 2.0e3/time[3][4];
        time[5][4] = total/cray;

        matgen(aa,ldaa,n,b,&norma);
        t1 = second();
        dgefa(aa,ldaa,n,ipvt,&info);
        time[0][5] = second() - t1;
        t1 = second();
        dgesl(aa,ldaa,n,ipvt,b,0);
        time[1][5] = second() - t1;
        total = time[0][5] + time[1][5];
        time[2][5] = total;
        time[3][5] = ops/(1.0e3*total);
        time[4][5] = 2.0e3/time[3][5];
        time[5][5] = total/cray;

	matgen(aa,ldaa,n,b,&norma);
	t1 = second();
	dgefa(aa,ldaa,n,ipvt,&info);
	time[0][6] = second() - t1;
	t1 = second();
	dgesl(aa,ldaa,n,ipvt,b,0);
	time[1][6] = second() - t1;
	total = time[0][6] + time[1][6];
	time[2][6] = total;
	time[3][6] = ops/(1.0e3*total);
	time[4][6] = 2.0e3/time[3][6];
	time[5][6] = total/cray;

	ntimes = NTIMES;
	tm2 = 0;
	t1 = second();
	for (i = 0; i < ntimes; i++) {
		tm = second();
		matgen(aa,ldaa,n,b,&norma);
		tm2 = tm2 + second() - tm;
		dgefa(aa,ldaa,n,ipvt,&info);
	}
	time[0][7] = (second() - t1 - tm2)/ntimes;
	t1 = second();
	for (i = 0; i < ntimes; i++) {
		dgesl(aa,ldaa,n,ipvt,b,0);
	}
	time[1][7] = (second() - t1)/ntimes;
	total = time[0][7] + time[1][7];
	time[2][7] = total;
	time[3][7] = ops/(1.0e3*total);
	time[4][7] = 2.0e3/time[3][7];
	time[5][7] = total/cray;

	/* the following code sequence implements the semantics of
	   the Fortran intrinsics "nint(min(time[3][3],time[3][7]))"	*/

	kf = (time[3][3] < time[3][7]) ? time[3][3] : time[3][7];
	kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
	if (fabs((double)kf) < ONE) 
		kflops = 0;
	else {
		kflops = floor(fabs((double)kf));
		if (kf < ZERO) kflops = -kflops;
	}

	fprintf(stderr," times for array with leading dimension of%4d\n",ldaa);
	print_time(4);
	print_time(5);
	print_time(6);
	print_time(7);
	fprintf(stderr,ROLLING);fprintf(stderr,PREC);
	fprintf(stderr," Precision %5d Kflops ; %d Reps \n",kflops,NTIMES);
}
예제 #10
0
int
clinpack_kflops ( int ntimes )
{
   static REAL aa[200][200],a[200][201],b[200],x[200];
   REAL cray,ops,total,norma,normx;
   REAL resid,residn,eps;
   REAL kf;
   double t1,tm,tm2;
   static int ipvt[200],n,i,info,lda,ldaa,kflops;

#if defined(WIN32)
   static float one_tick = .0001;
#else
   static long clock_tick = -1;
   static float one_tick;
   if ( clock_tick < 1 || clock_tick > 1000) {
                clock_tick = sysconf( _SC_CLK_TCK );
					/* clock_tick is the number of ticks per second */
				one_tick = (float) 1 / clock_tick;
					/* one_tick is the length of time for one tick */
   }
#endif

   lda = 201;
   ldaa = 200;
   cray = .056; 
   n = 100;


	ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

	matgen((double *)a,lda,n,b,&norma);
	t1 = dtime();
	dgefa((double *)a,lda,n,ipvt,&info);
	st[0][0] = dtime() - t1;
	
	t1 = dtime();
	dgesl((double *)a,lda,n,ipvt,b,0);
	st[1][0] = dtime() - t1;

	total = st[0][0] + st[1][0];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

/*     compute a residual to verify results.  */ 

	for (i = 0; i < n; i++)
	   {
	       x[i] = b[i];
	   }
	matgen((double *)a,lda,n,b,&norma);
	for (i = 0; i < n; i++) 
	   {
	       b[i] = -b[i];
	   }
	dmxpy(n,b,n,lda,x,(double *)a);
	resid = 0.0;
	normx = 0.0;
	for (i = 0; i < n; i++)
	 {
	       resid = (resid > fabs((double)b[i])) 
	 ? resid : fabs((double)b[i]);
	       normx = (normx > fabs((double)x[i])) 
	 ? normx : fabs((double)x[i]);
	 }
	eps = epslon((REAL)ONE);
	residn = resid/( n*norma*normx*eps );
   

	st[2][0] = total;
	st[3][0] = ops/(1.0e3*total);
	st[4][0] = 2.0e3/st[3][0];
	st[5][0] = total/cray;


	matgen((double *)a,lda,n,b,&norma);
	t1 = dtime();
	dgefa((double *)a,lda,n,ipvt,&info);
	st[0][1] = dtime() - t1;
	
	t1 = dtime();
	dgesl((double *)a,lda,n,ipvt,b,0);
	st[1][1] = dtime() - t1;

	total = st[0][1] + st[1][1];
	
		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

	st[2][1] = total;
	st[3][1] = ops/(1.0e3*total);
	st[4][1] = 2.0e3/st[3][1];
	st[5][1] = total/cray;

	matgen((double *)a,lda,n,b,&norma);
	
	t1 = dtime();
	dgefa((double *)a,lda,n,ipvt,&info);
	st[0][2] = dtime() - t1;
	
	t1 = dtime();
	dgesl((double *)a,lda,n,ipvt,b,0);
	st[1][2] = dtime() - t1;

	total = st[0][2] + st[1][2];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

	st[2][2] = total;
	st[3][2] = ops/(1.0e3*total);
	st[4][2] = 2.0e3/st[3][2];
	st[5][2] = total/cray;

	tm2 = 0.0;
	t1 = dtime();

   for (i = 0; i < ntimes; i++) {
	       tm = dtime();
      matgen((double *)a,lda,n,b,&norma);
      tm2 = tm2 + dtime() - tm;
      dgefa((double *)a,lda,n,ipvt,&info);
      }

	st[0][3] = (dtime() - t1 - tm2)/ntimes;
	t1 = dtime();

   for (i = 0; i < ntimes; i++) {
	       dgesl((double *)a,lda,n,ipvt,b,0);
      }

	st[1][3] = (dtime() - t1)/ntimes;

	total = st[0][3] + st[1][3];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

	st[2][3] = total;
	st[3][3] = ops/(1.0e3*total);
	st[4][3] = 2.0e3/st[3][3];
	st[5][3] = total/cray;

	matgen((double *)aa,ldaa,n,b,&norma);
	t1 = dtime();
	dgefa((double *)aa,ldaa,n,ipvt,&info);
	st[0][4] = dtime() - t1;
	
	t1 = dtime();
	dgesl((double *)aa,ldaa,n,ipvt,b,0);
	st[1][4] = dtime() - t1;

	total = st[0][4] + st[1][4];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

	st[2][4] = total;
	st[3][4] = ops/(1.0e3*total);
	st[4][4] = 2.0e3/st[3][4];
	st[5][4] = total/cray;

	matgen((double *)aa,ldaa,n,b,&norma);
	t1 = dtime();
	dgefa((double *)aa,ldaa,n,ipvt,&info);
	st[0][5] = dtime() - t1;

	t1 = dtime();
	dgesl((double *)aa,ldaa,n,ipvt,b,0);
	st[1][5] = dtime() - t1;

	total = st[0][5] + st[1][5];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
	if( total == 0 ) total = one_tick;

	st[2][5] = total;
	st[3][5] = ops/(1.0e3*total);
	st[4][5] = 2.0e3/st[3][5];
	st[5][5] = total/cray;

   matgen((double *)aa,ldaa,n,b,&norma);
   t1 = dtime();
   dgefa((double *)aa,ldaa,n,ipvt,&info);
   st[0][6] = dtime() - t1;

   t1 = dtime();
   dgesl((double *)aa,ldaa,n,ipvt,b,0);
   st[1][6] = dtime() - t1;

   total = st[0][6] + st[1][6];

	   /* 
		  On extremely fast machines, the total time between checks
		  can be less than the resolution of the clock.  In this
		  case, total will be 0.  Set it to the time 1 clock tick
		  takes as a way to avoid dividing by 0.
		  Derek Wright, 9/4/97 
		*/
   if( total == 0 ) total = one_tick;

   st[2][6] = total;
   st[3][6] = ops/(1.0e3*total);
   st[4][6] = 2.0e3/st[3][6];
   st[5][6] = total/cray;

   tm2 = 0;
   t1 = dtime();
   for (i = 0; i < ntimes; i++) {
      tm = dtime();
      matgen((double *)aa,ldaa,n,b,&norma);
      tm2 = tm2 + dtime() - tm;
      dgefa((double *)aa,ldaa,n,ipvt,&info);
      }

   st[0][7] = (dtime() - t1 - tm2)/ntimes;
   
   t1 = dtime();
   for (i = 0; i < ntimes; i++) {
      dgesl((double *)aa,ldaa,n,ipvt,b,0);
      }

   st[1][7] = (dtime() - t1)/ntimes;

   total = st[0][7] + st[1][7];

		/* 
		   On extremely fast machines, the total time between checks
		   can be less than the resolution of the clock.  In this
		   case, total will be 0.  Set it to the time 1 clock tick
		   takes as a way to avoid dividing by 0.
		   Derek Wright, 9/4/97 
		 */
   if( total == 0 ) total = one_tick;

   st[2][7] = total;
   st[3][7] = ops/(1.0e3*total);
   st[4][7] = 2.0e3/st[3][7];
   st[5][7] = total/cray;

   /* the following code sequence implements the semantics of
      the Fortran intrinsics "nint(min(st[3][3],st[3][7]))"   */
/*
   kf = (st[3][3] < st[3][7]) ? st[3][3] : st[3][7];
   kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
   if (fabs((double)kf) < ONE) 
      kflops = 0;
   else {
      kflops = floor(fabs((double)kf));
      if (kf < ZERO) kflops = -kflops;
   }
*/
   if ( st[3][3] < ZERO ) st[3][3] = ZERO;
   if ( st[3][7] < ZERO ) st[3][7] = ZERO;
   kf = st[3][3];
   if ( st[3][7] < st[3][3] ) kf = st[3][7];
   kflops = (int)(kf + 0.5);

   return kflops;
}
예제 #11
0
int main(int argc, char **argv) {

/******************************************************************************/
/*
  Purpose:

    MAIN is the main program for LINPACK_BENCH.

  Discussion:

    LINPACK_BENCH drives the double precision LINPACK benchmark program.

  Modified:

    25 July 2008

  Parameters:

    N is the problem size.
*/
# define N 1000
# define LDA ( N + 1 )

  double *a;
  double a_max;
  double *b;
  double b_max;
  double cray = 0.056;
  double eps;
  int i;
  int info;
  int *ipvt;
  int j;
  int job;
  double ops;
  double *resid;
  double resid_max;
  double residn;
  double *rhs;
  double t1;
  double t2;
  double time[6];
  double total;
  double *x;

  int arg = argc > 1 ? argv[1][0] - '0' : 3;
  if (arg == 0) return 0;

  timestamp ( );
  printf ( "\n" );
  printf ( "LINPACK_BENCH\n" );
  printf ( "  C version\n" );
  printf ( "\n" );
  printf ( "  The LINPACK benchmark.\n" );
  printf ( "  Language: C\n" );
  printf ( "  Datatype: Double precision real\n" );
  printf ( "  Matrix order N               = %d\n", N );
  printf ( "  Leading matrix dimension LDA = %d\n", LDA );

  ops = ( double ) ( 2 * N * N * N ) / 3.0 + 2.0 * ( double ) ( N * N );
/*
  Allocate space for arrays.
*/
  a = r8mat_gen ( LDA, N );
  b = ( double * ) malloc ( N * sizeof ( double ) );
  ipvt = ( int * ) malloc ( N * sizeof ( int ) );
  resid = ( double * ) malloc ( N * sizeof ( double ) );
  rhs = ( double * ) malloc ( N * sizeof ( double ) );
  x = ( double * ) malloc ( N * sizeof ( double ) );

  a_max = 0.0;
  for ( j = 0; j < N; j++ )
  {
    for ( i = 0; i < N; i++ )
    {
      a_max = r8_max ( a_max, a[i+j*LDA] );
    }
  }

  for ( i = 0; i < N; i++ )
  {
    x[i] = 1.0;
  }

  for ( i = 0; i < N; i++ )
  {
    b[i] = 0.0;
    for ( j = 0; j < N; j++ )
    {
      b[i] = b[i] + a[i+j*LDA] * x[j];
    }
  }
  t1 = cpu_time ( );

  info = dgefa ( a, LDA, N, ipvt );

  if ( info != 0 )
  {
    printf ( "\n" );
    printf ( "LINPACK_BENCH - Fatal error!\n" );
    printf ( "  The matrix A is apparently singular.\n" );
    printf ( "  Abnormal end of execution.\n" );
    return 1;
  }

  t2 = cpu_time ( );
  time[0] = t2 - t1;

  t1 = cpu_time ( );

  job = 0;
  dgesl ( a, LDA, N, ipvt, b, job );

  t2 = cpu_time ( );
  time[1] = t2 - t1;

  total = time[0] + time[1];

  free ( a );
/*
  Compute a residual to verify results.
*/
  a = r8mat_gen ( LDA, N );

  for ( i = 0; i < N; i++ )
  {
    x[i] = 1.0;
  }

  for ( i = 0; i < N; i++ )
  {
    rhs[i] = 0.0;
    for ( j = 0; j < N; j++ )
    {
      rhs[i] = rhs[i] + a[i+j*LDA] * x[j];
    }
  }

  for ( i = 0; i < N; i++ )
  {
    resid[i] = -rhs[i];
    for ( j = 0; j < N; j++ )
    {
      resid[i] = resid[i] + a[i+j*LDA] * b[j];
    }
  }

  resid_max = 0.0;
  for ( i = 0; i < N; i++ )
  {
    resid_max = r8_max ( resid_max, r8_abs ( resid[i] ) );
  }

  b_max = 0.0;
  for ( i = 0; i < N; i++ )
  {
    b_max = r8_max ( b_max, r8_abs ( b[i] ) );
  }

  eps = r8_epsilon ( );

  residn = resid_max / ( double ) N / a_max / b_max / eps;

  time[2] = total;
  if ( 0.0 < total )
  {
    time[3] = ops / ( 1.0E+06 * total );
  }
  else
  {
    time[3] = -1.0;
  }
  time[4] = 2.0 / time[3];
  time[5] = total / cray;

  printf ( "\n" );
  printf ( "     Norm. Resid      Resid           MACHEP         X[1]          X[N]\n" );
  printf ( "\n" );
  printf ( "  %14f  %14f  %14e  %14f  %14f\n", residn, resid_max, eps, b[0], b[N-1] );
  printf ( "\n" );
  printf ( "      Factor     Solve      Total            Unit      Cray-Ratio\n" );
  printf ( "\n" );
  printf ( "  %9f  %9f  %9f  %9f  %9f\n", 
    time[0], time[1], time[2], time[4], time[5] );
  printf ( "\n" );
  printf ( "Unrolled Double  Precision %9f Mflops\n", time[3]);
  printf ( "\n" );

  free ( a );
  free ( b );
  free ( ipvt );
  free ( resid );
  free ( rhs );
  free ( x );
/*
  Terminate.
*/
  printf ( "\n" );
  printf ( "LINPACK_BENCH\n" );
  printf ( "  Normal end of execution.\n" );

  printf ( "\n" );
  timestamp ( );

  return 0;
# undef LDA
# undef N
}
예제 #12
0
파일: ndktest.c 프로젝트: Felard/MoSync
static REAL linpack(long nreps,int arsize)

	{
	REAL  *a,*b;
	REAL   norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
	int	  *ipvt,n,info,lda;
	long   i,arsize2d;
	char   buf[80];

	lda = arsize;
	n = arsize/2;
	arsize2d = (long)arsize*(long)arsize;
	ops=((2.0*n*n*n)/3.0+2.0*n*n);
	a=(REAL *)mempool;
	b=a+arsize2d;
	ipvt=(int *)&b[arsize];
	tdgesl=0;
	tdgefa=0;
	totalt=second();
	for (i=0;i<nreps;i++)
		{
		matgen(a,lda,n,b,&norma);
		t1 = second();
		dgefa(a,lda,n,ipvt,&info,1);
		tdgefa += second()-t1;
		t1 = second();
		dgesl(a,lda,n,ipvt,b,0,1);
		tdgesl += second()-t1;
		}
	for (i=0;i<nreps;i++)
		{
		matgen(a,lda,n,b,&norma);
		t1 = second();
		dgefa(a,lda,n,ipvt,&info,0);
		tdgefa += second()-t1;
		t1 = second();
		dgesl(a,lda,n,ipvt,b,0,0);
		tdgesl += second()-t1;
		}
	totalt=second()-totalt;
	if (totalt<0.5 || tdgefa+tdgesl<0.2)
		return(0.);
	kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
	toverhead=totalt-tdgefa-tdgesl;
	if (tdgefa<0.)
		tdgefa=0.;
	if (tdgesl<0.)
		tdgesl=0.;
	if (toverhead<0.)
		toverhead=0.;

	sprintf(buf,"%8ld %6.2f %6.2f%% %6.2f%% %6.2f%%	 %9.3f\n",
			nreps,totalt,100.*tdgefa/totalt,
			100.*tdgesl/totalt,100.*toverhead/totalt,
			kflops/1000.0);

	/* Callback to Java to print buf */
	pass_jni_msg(buf);
	if(totalt > 10.)//only send data to server when bench is done
		pass_result("mflops", (float) (kflops/1000.0));

	return(totalt);
	}
예제 #13
0
int main(int argc, char ** argv)
{
    int i;

    // do we have verbose output?
    bool ga_testing = false;

    if (argc > 1)
    {
        for (i = 1; i < argc; ++i)
        {
            if (!strcmp(argv[1],"-ga"))
            {
                ga_testing = true;
                break;
            }
        }
    }

    double ** a = (double **)malloc(sizeof(double) * N);

    for (i = 0; i < N; ++i)
        a[i] = (double *)malloc(sizeof(double) * NP1);

    double * b = (double *)malloc(sizeof(double) * N);
    double * x = (double *)malloc(sizeof(double) * N);
    int * ipvt = (int *)malloc(sizeof(int)    * N);

    // calculate operations per timeInSeconds
    double ops = (2.0 * ((double)N * N * N)) / 3.0 + 2.0 * ((double)N * N);

    // generate matrix
    matgen(a,b);

    // get starting time
    //struct timespec start, stop;
    //clock_gettime(CLOCK_REALTIME,&start);

    // what we're timing
    dgefa(a,ipvt);
    dgesl(a,ipvt,b);

    // calculate run time
    //clock_gettime(CLOCK_REALTIME,&stop);
    double run_time = 0;//(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0;

    // clean up
    free(ipvt);
    free(x);
    free(b);

    for (i = 0; i < N; ++i)
        free(a[i]);

    free(a);

    // report runtime
    if (ga_testing)
        fprintf(stdout,"%f",run_time);
    else
        fprintf(stdout,"\nlpbench (Std. C) run time: %f\n\n",run_time);

    fflush(stdout);

    // done
    return 0;
}
예제 #14
0
jobject Java_rs_pedjaapps_Linpack_MainActivity_runLinpack (JNIEnv* env, jobject thiz, jclass resultClass)
{
    __android_log_write (ANDROID_LOG_DEBUG, "linpack-jni.c", "running neon linpack");
        static REAL aa[200*200],a[200*201],b[200],x[200];       
        REAL cray,ops,total,norma,normx;
        REAL resid,residn,eps,tm2,epsn,x1,x2;
        REAL mflops;
        static int ipvt[200],n,i,j,ntimes,info,lda,ldaa;
        int endit, pass, loop;
        REAL overhead1, overhead2, time2;
        REAL max1, max2;
        char resultchars[1000];
        

        lda = 201;
        ldaa = 200;
        cray = .056; 
        n = 100;

        ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);

        matgen(a,lda,n,b,&norma);
        start_time();
        dgefa(a,lda,n,ipvt,&info);
        end_time();
        atime[0][0] = secs;
        start_time();
        dgesl(a,lda,n,ipvt,b,0);
        end_time();
        atime[1][0] = secs;
        total = atime[0][0] + atime[1][0];

//     compute a residual to verify results. 

        for (i = 0; i < n; i++) {
                x[i] = b[i];
        }
        matgen(a,lda,n,b,&norma);
        for (i = 0; i < n; i++) {
                b[i] = -b[i];
        }
        dmxpy(n,b,n,lda,x,a);
        resid = 0.0;
        normx = 0.0;
        for (i = 0; i < n; i++) {
                resid = (resid > fabs((double)b[i])) 
                        ? resid : fabs((double)b[i]);
                normx = (normx > fabs((double)x[i])) 
                        ? normx : fabs((double)x[i]);
        }
        eps = epslon(ONE);
        residn = resid/( n*norma*normx*eps );
        epsn = eps;
        x1 = x[0] - 1;
        x2 = x[n-1] - 1;
        

        atime[2][0] = total;
        if (total > 0.0)
        {
            atime[3][0] = ops/(1.0e6*total);
            atime[4][0] = 2.0/atime[3][0];
        }
        else
        {
            atime[3][0] = 0.0;
            atime[4][0] = 0.0;
        }
        atime[5][0] = total/cray;
       

// ************************************************************************
// *       Calculate overhead of executing matgen procedure              *
// ************************************************************************
       
        pass = -20;
        loop = NTIMES;
        do
        {
            start_time();
            pass = pass + 1;        
            for ( i = 0 ; i < loop ; i++)
            {
                 matgen(a,lda,n,b,&norma);
            }
            end_time();
            overhead1 = secs;
            if (overhead1 > runSecs)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (overhead1 < 0.1)
                {
                    loop = loop * 10;
                }
                else
                {
                    loop = loop * 2;
                }
            }
        }
        while (pass < 0);
        
        overhead1 = overhead1 / (double)loop;

 
// ************************************************************************
// *           Calculate matgen/dgefa passes for runSecs seconds                *
// ************************************************************************
       
        pass = -20;
        ntimes = NTIMES;
        do
        {
            start_time();
            pass = pass + 1;        
            for ( i = 0 ; i < ntimes ; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }
            end_time();
            time2 = secs;
            if (time2 > runSecs)
            {
                pass = 0;
            }
            if (pass < 0)
            {
                if (time2 < 0.1)
                {
                    ntimes = ntimes * 10;
                }
                else
                {
                    ntimes = ntimes * 2;
                }
            }
        }
        while (pass < 0);
        
        ntimes =  (int)(runSecs * (double)ntimes / time2);
        if (ntimes == 0) ntimes = 1;


// ************************************************************************
// *                              Execute 5 passes                        *
// ************************************************************************
      
        tm2 = ntimes * overhead1;
        atime[3][6] = 0;

        for (j=1 ; j<6 ; j++)
        {
            start_time();
            for (i = 0; i < ntimes; i++)
            {
                matgen(a,lda,n,b,&norma);
                dgefa(a,lda,n,ipvt,&info );
            }
            end_time();
            atime[0][j] = (secs - tm2)/ntimes;

            start_time();              
            for (i = 0; i < ntimes; i++)
            {
                dgesl(a,lda,n,ipvt,b,0);
            }
            end_time();

            atime[1][j] = secs/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][6] = atime[3][6] + atime[3][j];
            
        }
        atime[3][6] = atime[3][6] / 5.0;

// ************************************************************************
// *             Calculate overhead of executing matgen procedure         *
// ************************************************************************

        start_time();        
        for ( i = 0 ; i < loop ; i++)
        {
            matgen(aa,ldaa,n,b,&norma);    
        }
        end_time();
        overhead2 = secs;
        overhead2 = overhead2 / (double)loop;
        

// ************************************************************************
// *                              Execute 5 passes                        *
// ************************************************************************
              
        tm2 = ntimes * overhead2;
        atime[3][12] = 0;

        for (j=7 ; j<12 ; j++)
        {
            start_time();
            for (i = 0; i < ntimes; i++)
            {
                matgen(aa,ldaa,n,b,&norma);
                dgefa(aa,ldaa,n,ipvt,&info  );
            }
            end_time();
            atime[0][j] = (secs - tm2)/ntimes;
            
            start_time();      
            for (i = 0; i < ntimes; i++)
            {
                dgesl(aa,ldaa,n,ipvt,b,0);
            }
            end_time();
            atime[1][j] = secs/ntimes;
            total       = atime[0][j] + atime[1][j];
            atime[2][j] = total;
            atime[3][j] = ops/(1.0e6*total);
            atime[4][j] = 2.0/atime[3][j];
            atime[5][j] = total/cray;
            atime[3][12] = atime[3][12] + atime[3][j];

        }
        atime[3][12] = atime[3][12] / 5.0; 

// ************************************************************************
// *           Use minimum average as overall Mflops rating               *
// ************************************************************************
      
        mflops = atime[3][6];
        if (atime[3][12] < mflops) mflops = atime[3][12];


// ************************************************************************
// *              Add results to output file Linpack.txt                  *
// ************************************************************************

    max1 = 0;
    for (i=1 ; i<6 ; i++)
    {
        if (atime[3][i] > max1) max1 = atime[3][i];                 
    }

    max2 = 0;
    for (i=7 ; i<12 ; i++)
    {                 
        if (atime[3][i] > max2) max2 = atime[3][i];                 
    }
    if (max1 < max2) max2 = max1;

    jmethodID jConstructor = (*env)->GetMethodID (env, resultClass, "<init>", "()V");
    if (jConstructor == NULL)__android_log_write (ANDROID_LOG_ERROR, "linpack-jni.c", "jConstructor is NULL");
    jobject resultObject = (*env)->NewObject (env, resultClass, jConstructor);
    /*mFlops, residn, resid, epsn, x1, x2;*/
    jfieldID jMFlops = (*env)->GetFieldID (env, resultClass, "mflops", "D");
    jfieldID jResidn = (*env)->GetFieldID (env, resultClass, "nres", "D");
    jfieldID jEpsn = (*env)->GetFieldID (env, resultClass, "precision", "D");
    (*env)->SetDoubleField (env, resultObject, jMFlops, max2);
    (*env)->SetDoubleField (env, resultObject, jResidn, (double) residn);
    (*env)->SetDoubleField (env, resultObject, jEpsn, (double) epsn);
    return resultObject;
}