Ejemplo n.º 1
0
int WaitForIt(int nsec, char *fnam)
/*
 * only probe for file once every 1/5 of a second to avoid beating the crap
 * out of NFS server
 */
{
   FILE *fp;
   double t0, t1, dsec=(double)nsec, dwait;
   double ATL_walltime(void);

   fp = fopen(fnam, "r");
   if (!fp)
   {
      t0 = ATL_walltime();
      do
      {
         t1 = ATL_walltime();
         while(ATL_walltime()-t1 < 0.2);
         if (ATL_walltime()-t0 > dsec) return(1);
         fp = fopen(fnam, "r");
      }
      while(!fp);
   }
   fclose(fp);
   return(0);
}
Ejemplo n.º 2
0
double GetKmmMflop
(
    CINT mb, CINT nb, CINT kb,           /* C: mbxnb, At: kbxmb, B: kbXnb */
#ifdef ATL_NEWTIME
    CINT mu, CINT nu, CINT ku,
#endif
    CINT movA, CINT movB, CINT movC,     /* which mat move in flush array? */
    int FLSIZE,                          /* min area to move in in bytes */
    CINT reps,                           /* # calls to kmm in one timing */
    CINT LDC                             /* what should ldc be set to? */
)
/*
 * Returns MFLOP rate of matmul kernel KMM
 * LDC: if (LDC == 0), then set ldc=MB for timings.
 *      if (LDC != 0 && movC != 0), then ldc= col length in move space
 *      else ldc = LDC;
 *
 */
{
#ifdef ATL_NEWTIME
    CINT mblks = mb/mu, nblks = nb/nu;
#endif
    const int NOMOVE = !(movA|movB|movC);
    int ldc, setsz, nset, i, j, incA, incB, incC, n, extra;
    TYPE *C, *A, *B, *a, *b, *c;
    double t0, t1, mf;
    const TYPE alpha=1.0;
    TYPE beta=1.0;
    void *vp=NULL;

    if (NOMOVE)
    {
        ldc = (LDC) ? LDC : mb;
        setsz = (ldc * nb + kb*(mb+nb));
        vp = malloc(ATL_Cachelen + ATL_MulBySize(setsz));
        ATL_assert(vp);
        A =  ATL_AlignPtr(vp);
        B = A + mb*kb;
        C = B + kb*nb;
        for (i=0; i < setsz; i++) A[i] = dumb_rand();
        incA = incB = incC = 0;
    }
    else
    {
        if (movA && movB && movC)         /* no reuse at all */
        {
            setsz = ATL_MulBySize(mb*nb+kb*(mb+nb));
            nset = (FLSIZE+setsz-1)/setsz;
            FLSIZE = nset*setsz;
            setsz = mb*nb+kb*(mb+nb);
            vp = malloc(ATL_Cachelen + ATL_MulBySize(setsz));
            ATL_assert(vp);
            A = ATL_AlignPtr(vp);
            B = A + kb*mb*nset;
            C = B + kb*nb*nset;
            ldc = (LDC) ? mb*nset : mb;
            for (n=setsz*nset,i=0; i < n; i++) A[i] = dumb_rand();
            incA = mb*kb;
            incB = kb*nb;
            incC = mb*nb;
        }
        else if (movA && movB && !movC)   /* square-case ATLAS behavior */
        {
            setsz = kb*(mb+nb);
            ldc = (LDC) ? LDC : mb;
            ATL_assert(ldc >= mb);
            extra = ldc*nb;
            incA = mb*kb;
            incB = kb*nb;
            incC = 0;
        }
        else if (!movB && movA && movC)   /* rank-K behavior */
        {
            setsz = mb*(kb+nb);
            extra = kb*nb;
            incA = mb*kb;
            incB = 0;
            incC = mb*nb;
        }
        else
        {
            fprintf(stderr, "%s,%d: What case are you wanting?\n",
                    __FILE__, __LINE__);
            exit(-1);
        }
        if (!vp)
        {
            i = ATL_MulBySize(setsz);
            nset = (FLSIZE+i-1)/i;
            FLSIZE = nset * i;
            vp = malloc(ATL_Cachelen + ATL_MulBySize(FLSIZE+extra));
            ATL_assert(vp);
            A = ATL_AlignPtr(vp);
            if (movC)
            {
                C = A + mb*kb*nset;
                ldc = (LDC) ? mb*nset : mb;
                B = C + mb*nb*nset;
            }
            else
            {
                B = A + mb*kb*nset;
                C = B + kb*nb*nset;
            }
            for (n=setsz*nset+extra,i=0; i < n; i++) A[i] = dumb_rand();
        }
    }
    a = A;
    b = B;
    c = C;
    t0 = ATL_walltime();
    for (j=0,i=reps; i; i--)
    {
#ifdef ATL_NEWTIME
        KMM(mblks, nblks, kb, a, b, c, movA ? a+incA : a,
            movB ? b+incB : b, movC ? c+incC : c);
#else
        KMM(mb, nb, kb, alpha, a, kb, b, kb, beta, c, ldc);
#endif
        if (++j != nset)
        {
            a += incA;
            b += incB;
            c += incC;
        }
        else
        {
#ifndef ATL_NEWTIME
            beta = (beta != 0.0) ? -beta : 0.0;
#endif
            j = 0;
            a = A;
            b = B;
            c = C;
        }
    }
    t1 = ATL_walltime() - t0;
    mf = (2.0*reps*mb*nb*kb) / (t1*1000000.0);
    free(vp);
    return(mf);
}
Ejemplo n.º 3
0
int main(int nargs, char **args)
{
   int i, k, nreps = 200, opstride, which;
   double t0, tlin, tlg2, tdyn, trnk;
   ATL_TUNE_T ta[ATL_NTHREADS];
   volatile int done[ATL_NTHREADS];

   tlg2 = tdyn = tlin = 0.0;
   nreps = GetFlags(nargs, args, &which);

   for (i=0; i < ATL_NTHREADS; i++)
   {
      ta[i].rank = i;
      ta[i].nthr = ATL_NTHREADS;
      ta[i].donearr = done;
   }
   opstride = (int) ( ((char*)(ta+1)) - (char*)(ta) );

   printf("FINDING SPEED OF CREATE/BARRIER/JOIN USING %d REPITITIONS:\n",
          nreps);
   if (which & 1)
   {
      t0 = ATL_walltime();
      for (k=0; k < nreps; k++)
      {
         for (i=0; i < ATL_NTHREADS; i++) done[i] = 0;
         ATL_goparallel_dyn(ATL_NTHREADS, TuneDoWork, ta, NULL);
      }
      tdyn = ATL_walltime() - t0;
      printf("   dyn time = %e\n", (float)tdyn);
   }

   if (which & 2)
   {
      t0 = ATL_walltime();
      for (k=0; k < nreps; k++)
      {
         for (i=0; i < ATL_NTHREADS; i++) done[i] = 0;
         ATL_goparallel_log2(ATL_NTHREADS, TuneDoWork, ta, NULL);
      }
      tlg2 = ATL_walltime() - t0;
      printf("   lg2 time = %e\n", (float)tlg2);
   }

   if (which & 4)
   {
      t0 = ATL_walltime();
      for (k=0; k < nreps; k++)
      {
         for (i=0; i < ATL_NTHREADS; i++) done[i] = 0;
         ATL_goparallel_lin(ATL_NTHREADS, TuneDoWork, ta, NULL);
      }
      tlin = ATL_walltime() - t0;
      printf("   lin time = %e\n", (float)tlin);
   }
   if (which & 8)
   {
      t0 = ATL_walltime();
      for (k=0; k < nreps; k++)
      {
         for (i=0; i < ATL_NTHREADS; i++) done[i] = 0;
         ATL_goparallel_prank(ATL_NTHREADS, TuneDoWork_gp, ta, NULL);
      }
      trnk = ATL_walltime() - t0;
      printf("   rnk time = %e\n", (float)trnk);
   }
   if ((which | 7) == which)
      printf("DYNAMIC is %.2f%% of LINEAR and %.2f%% of LOG2 SPEED.\n",
             (tdyn/tlin)*100.0, (tdyn/tlg2)*100.0);
   if ((which & 1) && (which & 8))
      printf("rank dynamic is %.2f%% of affinity dynamic\n", (trnk/tdyn)*100.0);
   return(0);
}