Esempio n. 1
0
double GetTimeWithReps_LU
   (int mflopF, int lda, int M, int N, int nb, int Uplo, int Side, int flsizeKB)
{
   double mflop, t0, t1, drep;
   char *wrksets;       /* working sets for kernel calls */
#ifdef TCPLX
   const int lda2 = lda+lda;
#else
   const int lda2 = lda;
#endif
   size_t setsz, setszT;   /* work set size in memory, and amnt of it touched */
   size_t nrep;            /* # of reps required to force mflopF flops */
   size_t nset;            /* # of working sets allocated */
   int i;
/*
 * Keep setsz a multiple of TYPE size for alignment reasons.  LU only accesses
 * M*N of matrix and all of IPIV.
 */
   setsz = lda*N*ATL_sizeof +
           ((M*sizeof(int)+ATL_sizeof-1)/ATL_sizeof)*ATL_sizeof;
   setszT = M*N*ATL_sizeof + M*sizeof(int);
   mflop = GetFlopCount(LAgetrf, 0, M, N, 0, 0, CAN_NB);
/*
 * Cannot reuse matrices (bogus to factor an already factored matrix), so we
 * must take as our total memspace MAX(nrep,nset)*setsz
 */
   ATL_assert(mflop > 0.0);
   drep = (mflopF*1.0e6) / mflop;
   nrep = (int)(drep+0.999999);
/*
 * If cacheline flush doesn't work, then we must use this method
 */
   #if ATL_LINEFLUSH
      if (nrep < 2)
         return(-1.0);                                /* do wt normal timer */
   #else
      nrep = (nrep >= 1) ? nrep : 1;
   #endif

   nset = (flsizeKB*1024+setszT-1)/setszT;
   if (nset < nrep)
      nset = nrep;
   wrksets = malloc(nset * setsz);
   ATL_assert(wrksets);

   for (i=0; i < nset; i++)
      Mjoin(PATL,gegen)(M, N, (TYPE*)(wrksets+i*setsz), lda, M*N+lda);

   t0 = time00();
   for (i=0; i < nrep; i++)
   {
      test_getrf(CblasColMajor, M, N, (TYPE*)(wrksets+i*setsz), lda,
                 (int*)(wrksets+i*setsz+lda*N*ATL_sizeof));
   }
   t1 = time00();
   free(wrksets);

   return((t1-t0)/((double)nrep));
}
Esempio n. 2
0
static TYPE lutestR(int CacheSize, int M, int N, int lda, int *npiv,
                    double *tim)
{
   TYPE *A, *LmU;
   int *ipiv;
   const int MN = Mmin(M,N);
   int i;
   double t0, t1;
   TYPE normA, eps, resid;

   eps = Mjoin(PATL,epsilon)();
   A = malloc(ATL_MulBySize(lda)*M);
   if (A == NULL) return(-1);
   ipiv = malloc( MN * sizeof(int) );
   if (ipiv == NULL)
   {
      free(A);
      return(-1);
   }
   t0 = ATL_flushcache(CacheSize);

   Mjoin(PATL,gegen)(N, M, A, lda, M*N+lda);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("A0", N, M, A, lda);
   #endif
   normA = Mjoin(PATL,genrm1)(N, M, A, lda); /* actually infnrm, but OK */

   t0 = ATL_flushcache(-1);

   t0 = time00();
   test_getrf(CblasRowMajor, M, N, A, lda, ipiv);
   t1 = time00() - t0;
   *tim = t1;

   t0 = ATL_flushcache(0);

   #ifdef DEBUG
      Mjoin(PATL,geprint)("LU", N, M, A, lda);
   #endif
   LmU = ATL_LmulUR(M, N, A, lda);  /* LmU contains L * U */
   #ifdef DEBUG
      Mjoin(PATL,geprint)("L*U", N, M, LmU, N);
   #endif
   Mjoin(PATL,gegen)(N, M, A, lda, M*N+lda);  /* regenerate A, overwriting LU */
   ATL_laswp(M, A, lda, 0, MN, ipiv, 1);  /* apply swaps to A */
   resid = Mjoin(PATL,gediffnrm1)(N, M, A, lda, LmU, N);
   resid /= (normA * eps * Mmin(M,N));
   *npiv = findnpvt(MN, ipiv);

   free(LmU);
   free(A);
   free(ipiv);

   return(resid);
}
Esempio n. 3
0
int RunCase(int CacheSize, TYPE thresh, int MFLOP, enum ATLAS_ORDER Order,
            int M, int N, int lda)
{
   char *cord = (Order == AtlasColMajor ? "Col" : "Row");
   const double maxMN = Mmax(M,N), minMN = Mmin(M,N);
   unsigned long nreps=0;
   int npiv=(-1), *ipiv;
   const int incA = (Order == AtlasColMajor ? N*lda : M*lda);
   double mflops, mflop, resid, tim=(-1.0), t0;
   TYPE *A, *a;
   int i;

   #ifdef TREAL
      mflops = maxMN * minMN * minMN - ((minMN*minMN*minMN) / 3.0) -
               (minMN*minMN) / 2.0;
   #else
      mflops = (maxMN * minMN * minMN - ((minMN*minMN*minMN) / 3.0) +
                (maxMN*minMN) / 2.0)*4.0 - 3.0 * minMN*minMN;
   #endif
   mflops /= 1000000.0;

   if (thresh > ATL_rzero)
   {
      if (Order == AtlasColMajor)
         resid = lutestC(CacheSize, M, N, lda, &npiv, &tim);
      else resid = lutestR(CacheSize, M, N, lda, &npiv, &tim);
   }
   else resid = -1.0;
   if (MFLOP > mflops || thresh <= ATL_rzero) /* need to time repetitively */
   {
      nreps = (mflops*1000000);
      nreps = (MFLOP*1000000 + nreps-1) / nreps;
      if (nreps < 1) nreps = 1;
      i = ATL_DivBySize(2*CacheSize) ATL_PTCACHEMUL;
      i = (i + M*N) / (M*N);
      if (i < nreps) i = nreps;  /* don't reuse mem or no pivoting */
      a = A = malloc(i * ATL_MulBySize(incA));
      if (A != NULL)
      {
         ipiv = malloc(Mmin(M,N)*sizeof(int));  /* what the hell - reuse ipiv */
         if (ipiv)
         {
            Mjoin(PATL,gegen)(i*incA, 1, A, i*incA, incA+M+3012);
            t0 = time00();
            for (i=nreps; i; i--, a += incA)
               test_getrf(Order, M, N, a, lda, ipiv);
            tim = time00() - t0;
            tim /= nreps;
            if (npiv == 0) npiv = findnpvt(Mmin(M,N), ipiv);
            free(ipiv);
         }
         else fprintf(stderr, "   WARNING: not enough mem to run timings!\n");
         free(A);
      }
      else fprintf(stderr, "   WARNING: not enough mem to run timings!\n");
   }
   if (tim > 0.0) mflop = mflops / tim;
   else mflop = 0.0;
   fprintf(stdout, "%5d  %3s   %6d %6d %6d %6d %9.3f %9.3f %9.3e\n",
           nreps, cord, M, N, lda, npiv, tim, mflop, resid);
   return(resid <= thresh);
}
Esempio n. 4
0
double GetTime(int rout, int mflopF, int lda, int M, int N, int nb, int Uplo,
               int Side, int flsizeKB)
{
#if ATL_LINEFLUSH
   FLSTRUCT *flp;
#endif
   TYPE *A, *wrk=NULL, dtmp, dtmp1, *tau=NULL;
   int *ipiv=NULL, itmp, wlen;
   double t0, t1;
/*
 * Call routs that force particular flop count if requested; they return -1.0
 * if one invocation will suffice to force mflopF, in which case do the timing
 * in this routine, which is simpler & doesn't require LRU & as much workspace
 * If we don't have the ability to do cacheline flushing, must use LRU rout!
 */
#if ATL_LINEFLUSH
   if (mflopF > 0)
   {
#endif
      if (rout == LApotrf)
         t1 = GetTimeWithReps_LLT(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB);
      else if (rout == LAgeqrf)
      {
         if (Side == LARight)
         {
            if (Uplo == LAUpper)
               t1 = GetTimeWithReps_QR(mflopF, lda, M, N, nb, Uplo, Side,
                                       flsizeKB);
            else
               t1 = GetTimeWithReps_QL(mflopF, lda, M, N, nb, Uplo, Side,
                                       flsizeKB);
         }
         else if (Uplo == LAUpper)
            t1 = GetTimeWithReps_RQ(mflopF, lda, M, N, nb, Uplo, Side,
                                    flsizeKB);
         else
            t1 = GetTimeWithReps_LQ(mflopF, lda, M, N, nb, Uplo, Side,
                                    flsizeKB);
      }
      else
         t1 = GetTimeWithReps_LU(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB);
#if ATL_LINEFLUSH == 0
      return(t1);
#else
      if (t1 >= 0.0)
         return(t1);
   }
#endif
#if ATL_LINEFLUSH != 0
/*
 * Generate operands
 */
   A = GetGE(M, N, lda);
   ATL_assert(A);
   flp = ATL_GetFlushStruct(A, N*((size_t)lda)*ATL_sizeof, NULL);
   if (rout == LApotrf)
      PosDefGen(CblasColMajor, Uplo_LA2ATL(Uplo), N, A, lda);
   else if (rout & LAgeqrf)
   {                            /* QR must allocate workspace */
      if (Side == LARight)
      {
         if (Uplo == LAUpper)
         {
            test_geqrf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1);
         }
         else
         {
            test_geqlf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1);
         }
      }
      else if (Uplo == LAUpper)
      {
         test_gerqf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1);
      }
      else
      {
         test_gelqf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1);
      }
      wlen = dtmp;
      wrk = calloc(wlen, ATL_sizeof);
      ATL_assert(wrk);
      flp = ATL_GetFlushStruct(wrk, wlen*ATL_sizeof, flp);
      itmp = (M >= N) ? M : N;
      tau = calloc(itmp, ATL_sizeof);
      flp = ATL_GetFlushStruct(tau, itmp*ATL_sizeof, flp);
   }
   else
   {
      ipiv = calloc(M, sizeof(int));
      ATL_assert(ipiv);
      flp = ATL_GetFlushStruct(ipiv, M*sizeof(int), flp);
   }
/*
 * Flush cache, and do timing
 */
   ATL_FlushAreasByCL(flp);
   if (rout == LApotrf)
   {
      t0 = time00();
      test_potrf(Uplo, N, A, lda);
      t1 = time00();
   }
   else if (rout == LAgeqrf)
   {
      if (Side == LARight)
      {
         if (Uplo == LAUpper)
         {
            t0 = time00();
            test_geqrf(CblasColMajor, M, N, A, lda, tau, wrk, wlen);
            t1 = time00();
         }
         else
         {
            t0 = time00();
            test_geqlf(CblasColMajor, M, N, A, lda, tau, wrk, wlen);
            t1 = time00();
         }
      }
      else if (Uplo == LAUpper)
      {
         t0 = time00();
         test_gerqf(CblasColMajor, M, N, A, lda, tau, wrk, wlen);
         t1 = time00();
      }
      else
      {
         t0 = time00();
         test_gelqf(CblasColMajor, M, N, A, lda, tau, wrk, wlen);
         t1 = time00();
      }
   }
   else
   {
      t0 = time00();
      test_getrf(CblasColMajor, M, N, A, lda, ipiv);
      t1 = time00();
   }
   if (tau)
      free(tau);
   if (wrk)
      free(wrk);
   if (ipiv)
      free(ipiv);
   free(A);
   ATL_KillAllFlushStructs(flp);
   return(t1 - t0);
#endif
}