Ejemplo n.º 1
0
int RunCase(int CacheSize, TYPE thresh, int MFLOP, enum ATLAS_UPLO Uplo,
            int N, int lda)
{
   char *Ups, *Ord;
   TYPE resid = 0.0;
   double mflop, mflops, t0, tim=0.0;
   int nreps=1, passed, i, imem;
   const int incA = lda*N;
   TYPE *a, *A;

   mflops = N;
   #ifdef TREAL
      mflops = (mflops*mflops*mflops) / 3.0 + (mflops*mflops) / 2.0;
   #else
      mflops = (4.0/3.0)*(mflops*mflops*mflops) + 3.0 * (mflops*mflops);
   #endif
   mflops /= 1000000.0;

   if (thresh > ATL_rzero) resid =
      llttest(Uplo, CacheSize, N, lda, &tim);
   else resid = -1.0;

   if (MFLOP > mflops || thresh <= ATL_rzero) /* need to time repetitively */
   {
      nreps = (mflops * 1000000);
      nreps = (MFLOP*1000000 + nreps-1) / nreps;
      if (nreps < 1) nreps = 1;
      imem = ATL_DivBySize(CacheSize) ATL_PTCACHEMUL;
      imem = (imem + 2*N*N-1) / (N*N);
      if (imem < nreps) imem = nreps;
      a = A = malloc(imem * ATL_MulBySize(incA));
      if (A != NULL)
      {
         for (i=0; i < imem; i++) lltgen(Uplo, N, A+i*incA, lda, N*1029+lda);
         t0 = time00();
         for (i=nreps; i; i--, a += incA)
            test_potrf(Uplo, N, a, lda);
         tim = time00() - t0;
         tim /= nreps;
         free(A);
      }
      else fprintf(stderr, "   WARNING: not enough mem to run timings!\n");
   }

   if (tim > 0.0) mflop = mflops / tim;
   else mflop = 0.0;
   if (Uplo == AtlasUpper) Ups = "Upper";
   else Ups = "Lower";
   fprintf(stdout, "%5d  %5s %6d %6d  %12.5f  %12.3f  %12e\n",
           nreps, Ups, N, lda, tim, mflop, resid);
   if (resid > thresh || resid != resid) passed = 0;
   else if (resid < 0.0) passed = -1;
   else passed = 1;
   return(passed);
}
Ejemplo n.º 2
0
double GetTimeWithReps_LLT
   (int mflopF, int lda, int M, int N, int nb, int Uplo, int Side, int flsizeKB)
{
   double mflop, t0, t1, drep;
   char *wrksets;       /* working sets for kernel calls */
#ifdef TCPLX
   const int lda2 = lda+lda;
#else
   const int lda2 = lda;
#endif
   size_t setsz, setszT;   /* work set size in memory, and amnt of it touched */
   size_t nrep;            /* # of reps required to force mflopF flops */
   size_t nset;            /* # of working sets allocated */
   int i;
   setsz=lda*N*ATL_sizeof;   /* matrix is entire working set of LLt */
   setszT=N*N*ATL_sizeof;    /* only touch N*N portion */
   mflop = GetFlopCount(LApotrf, Uplo, M, N, 0, 0, CAN_NB);
/*
 * Cannot reuse matrices (bogus to factor an already factored matrix), so we
 * must take as our total memspace MAX(nrep,nset)*setsz
 */
   ATL_assert(mflop > 0.0);
   drep = (mflopF*1.0e6) / mflop;
   nrep = (int)(drep+0.999999);
/*
 * If cacheline flush doesn't work, then we must use this method
 */
   #if ATL_LINEFLUSH
      if (nrep < 2)
         return(-1.0);                                /* do wt normal timer */
   #else
      nrep = (nrep >= 1) ? nrep : 1;
   #endif

   nset = (flsizeKB*1024+setszT-1)/setszT;
   if (nset < nrep)
      nset = nrep;
   wrksets = malloc(nset * setsz);
   ATL_assert(wrksets);

   for (i=0; i < nset; i++)
      PosDefGen(CblasColMajor, Uplo_LA2ATL(Uplo), N,
                (TYPE*)(wrksets+i*setsz), lda);

   t0 = time00();
   for (i=0; i < nrep; i++)
   {
      test_potrf(Uplo, N, (TYPE*)(wrksets+i*setsz), lda);
   }
   t1 = time00();
   free(wrksets);

   return((t1-t0)/((double)nrep));
}
Ejemplo n.º 3
0
static TYPE llttest(enum ATLAS_UPLO Uplo, int CacheSize, int N, int lda,
                    double *tim)
{
   TYPE *A, *LmLt;
   int i;
   double t0, t1;
   TYPE normA, eps, resid;

   eps = Mjoin(PATL,epsilon)();
   A = malloc(ATL_MulBySize(lda)*N);
   if (A == NULL) return(-1);
   t0 = ATL_flushcache(CacheSize);
   lltgen(Uplo, N, A, lda, N*1029+lda);
   normA = lltnrm1(Uplo, N, A, lda);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("A0", N, N, A, lda);
   #endif

   t0 = ATL_flushcache(-1);

   t0 = time00();
   test_potrf(Uplo, N, A, lda);
   t1 = time00() - t0;
   *tim = t1;

   t0 = ATL_flushcache(0);

   #ifdef DEBUG
      Mjoin(PATL,geprint)("L", N, N, A, lda);
   #endif
   ATL_checkpad(Uplo, N, A, lda);
   if (Uplo == AtlasUpper) LmLt = ATL_UtmulU(N, A, lda);
   else LmLt = ATL_LmulLt(N, A, lda);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("L*Lt", N, N, LmLt, N);
   #endif
   lltgen(Uplo, N, A, lda, N*1029+lda);  /* regen A over LLt */
   lltdiff(Uplo, N, A, lda, LmLt, N);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("A-L*Lt", N, N, LmLt, N);
   #endif
   resid = lltnrm1(Uplo, N, LmLt, N);
   #ifdef DEBUG
      if (resid/(normA*eps*N) > 10.0)
         fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid);
   #endif
   resid /= (normA * eps * N);

   free(LmLt);
   free(A);

   return(resid);
}
Ejemplo n.º 4
0
double GetTime(int rout, int mflopF, int lda, int M, int N, int nb, int Uplo,
               int Side, int flsizeKB)
{
#if ATL_LINEFLUSH
   FLSTRUCT *flp;
#endif
   TYPE *A, *wrk=NULL, dtmp, dtmp1, *tau=NULL;
   int *ipiv=NULL, itmp, wlen;
   double t0, t1;
/*
 * Call routs that force particular flop count if requested; they return -1.0
 * if one invocation will suffice to force mflopF, in which case do the timing
 * in this routine, which is simpler & doesn't require LRU & as much workspace
 * If we don't have the ability to do cacheline flushing, must use LRU rout!
 */
#if ATL_LINEFLUSH
   if (mflopF > 0)
   {
#endif
      if (rout == LApotrf)
         t1 = GetTimeWithReps_LLT(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB);
      else if (rout == LAgeqrf)
      {
         if (Side == LARight)
         {
            if (Uplo == LAUpper)
               t1 = GetTimeWithReps_QR(mflopF, lda, M, N, nb, Uplo, Side,
                                       flsizeKB);
            else
               t1 = GetTimeWithReps_QL(mflopF, lda, M, N, nb, Uplo, Side,
                                       flsizeKB);
         }
         else if (Uplo == LAUpper)
            t1 = GetTimeWithReps_RQ(mflopF, lda, M, N, nb, Uplo, Side,
                                    flsizeKB);
         else
            t1 = GetTimeWithReps_LQ(mflopF, lda, M, N, nb, Uplo, Side,
                                    flsizeKB);
      }
      else
         t1 = GetTimeWithReps_LU(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB);
#if ATL_LINEFLUSH == 0
      return(t1);
#else
      if (t1 >= 0.0)
         return(t1);
   }
#endif
#if ATL_LINEFLUSH != 0
/*
 * Generate operands
 */
   A = GetGE(M, N, lda);
   ATL_assert(A);
   flp = ATL_GetFlushStruct(A, N*((size_t)lda)*ATL_sizeof, NULL);
   if (rout == LApotrf)
      PosDefGen(CblasColMajor, Uplo_LA2ATL(Uplo), N, A, lda);
   else if (rout & LAgeqrf)
   {                            /* QR must allocate workspace */
      if (Side == LARight)
      {
         if (Uplo == LAUpper)
         {
            test_geqrf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1);
         }
         else
         {
            test_geqlf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1);
         }
      }
      else if (Uplo == LAUpper)
      {
         test_gerqf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1);
      }
      else
      {
         test_gelqf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1);
      }
      wlen = dtmp;
      wrk = calloc(wlen, ATL_sizeof);
      ATL_assert(wrk);
      flp = ATL_GetFlushStruct(wrk, wlen*ATL_sizeof, flp);
      itmp = (M >= N) ? M : N;
      tau = calloc(itmp, ATL_sizeof);
      flp = ATL_GetFlushStruct(tau, itmp*ATL_sizeof, flp);
   }
   else
   {
      ipiv = calloc(M, sizeof(int));
      ATL_assert(ipiv);
      flp = ATL_GetFlushStruct(ipiv, M*sizeof(int), flp);
   }
/*
 * Flush cache, and do timing
 */
   ATL_FlushAreasByCL(flp);
   if (rout == LApotrf)
   {
      t0 = time00();
      test_potrf(Uplo, N, A, lda);
      t1 = time00();
   }
   else if (rout == LAgeqrf)
   {
      if (Side == LARight)
      {
         if (Uplo == LAUpper)
         {
            t0 = time00();
            test_geqrf(CblasColMajor, M, N, A, lda, tau, wrk, wlen);
            t1 = time00();
         }
         else
         {
            t0 = time00();
            test_geqlf(CblasColMajor, M, N, A, lda, tau, wrk, wlen);
            t1 = time00();
         }
      }
      else if (Uplo == LAUpper)
      {
         t0 = time00();
         test_gerqf(CblasColMajor, M, N, A, lda, tau, wrk, wlen);
         t1 = time00();
      }
      else
      {
         t0 = time00();
         test_gelqf(CblasColMajor, M, N, A, lda, tau, wrk, wlen);
         t1 = time00();
      }
   }
   else
   {
      t0 = time00();
      test_getrf(CblasColMajor, M, N, A, lda, ipiv);
      t1 = time00();
   }
   if (tau)
      free(tau);
   if (wrk)
      free(wrk);
   if (ipiv)
      free(ipiv);
   free(A);
   ATL_KillAllFlushStructs(flp);
   return(t1 - t0);
#endif
}