int RunCase(int CacheSize, TYPE thresh, int MFLOP, enum ATLAS_UPLO Uplo, int N, int lda) { char *Ups, *Ord; TYPE resid = 0.0; double mflop, mflops, t0, tim=0.0; int nreps=1, passed, i, imem; const int incA = lda*N; TYPE *a, *A; mflops = N; #ifdef TREAL mflops = (mflops*mflops*mflops) / 3.0 + (mflops*mflops) / 2.0; #else mflops = (4.0/3.0)*(mflops*mflops*mflops) + 3.0 * (mflops*mflops); #endif mflops /= 1000000.0; if (thresh > ATL_rzero) resid = llttest(Uplo, CacheSize, N, lda, &tim); else resid = -1.0; if (MFLOP > mflops || thresh <= ATL_rzero) /* need to time repetitively */ { nreps = (mflops * 1000000); nreps = (MFLOP*1000000 + nreps-1) / nreps; if (nreps < 1) nreps = 1; imem = ATL_DivBySize(CacheSize) ATL_PTCACHEMUL; imem = (imem + 2*N*N-1) / (N*N); if (imem < nreps) imem = nreps; a = A = malloc(imem * ATL_MulBySize(incA)); if (A != NULL) { for (i=0; i < imem; i++) lltgen(Uplo, N, A+i*incA, lda, N*1029+lda); t0 = time00(); for (i=nreps; i; i--, a += incA) test_potrf(Uplo, N, a, lda); tim = time00() - t0; tim /= nreps; free(A); } else fprintf(stderr, " WARNING: not enough mem to run timings!\n"); } if (tim > 0.0) mflop = mflops / tim; else mflop = 0.0; if (Uplo == AtlasUpper) Ups = "Upper"; else Ups = "Lower"; fprintf(stdout, "%5d %5s %6d %6d %12.5f %12.3f %12e\n", nreps, Ups, N, lda, tim, mflop, resid); if (resid > thresh || resid != resid) passed = 0; else if (resid < 0.0) passed = -1; else passed = 1; return(passed); }
double GetTimeWithReps_LLT (int mflopF, int lda, int M, int N, int nb, int Uplo, int Side, int flsizeKB) { double mflop, t0, t1, drep; char *wrksets; /* working sets for kernel calls */ #ifdef TCPLX const int lda2 = lda+lda; #else const int lda2 = lda; #endif size_t setsz, setszT; /* work set size in memory, and amnt of it touched */ size_t nrep; /* # of reps required to force mflopF flops */ size_t nset; /* # of working sets allocated */ int i; setsz=lda*N*ATL_sizeof; /* matrix is entire working set of LLt */ setszT=N*N*ATL_sizeof; /* only touch N*N portion */ mflop = GetFlopCount(LApotrf, Uplo, M, N, 0, 0, CAN_NB); /* * Cannot reuse matrices (bogus to factor an already factored matrix), so we * must take as our total memspace MAX(nrep,nset)*setsz */ ATL_assert(mflop > 0.0); drep = (mflopF*1.0e6) / mflop; nrep = (int)(drep+0.999999); /* * If cacheline flush doesn't work, then we must use this method */ #if ATL_LINEFLUSH if (nrep < 2) return(-1.0); /* do wt normal timer */ #else nrep = (nrep >= 1) ? nrep : 1; #endif nset = (flsizeKB*1024+setszT-1)/setszT; if (nset < nrep) nset = nrep; wrksets = malloc(nset * setsz); ATL_assert(wrksets); for (i=0; i < nset; i++) PosDefGen(CblasColMajor, Uplo_LA2ATL(Uplo), N, (TYPE*)(wrksets+i*setsz), lda); t0 = time00(); for (i=0; i < nrep; i++) { test_potrf(Uplo, N, (TYPE*)(wrksets+i*setsz), lda); } t1 = time00(); free(wrksets); return((t1-t0)/((double)nrep)); }
static TYPE llttest(enum ATLAS_UPLO Uplo, int CacheSize, int N, int lda, double *tim) { TYPE *A, *LmLt; int i; double t0, t1; TYPE normA, eps, resid; eps = Mjoin(PATL,epsilon)(); A = malloc(ATL_MulBySize(lda)*N); if (A == NULL) return(-1); t0 = ATL_flushcache(CacheSize); lltgen(Uplo, N, A, lda, N*1029+lda); normA = lltnrm1(Uplo, N, A, lda); #ifdef DEBUG Mjoin(PATL,geprint)("A0", N, N, A, lda); #endif t0 = ATL_flushcache(-1); t0 = time00(); test_potrf(Uplo, N, A, lda); t1 = time00() - t0; *tim = t1; t0 = ATL_flushcache(0); #ifdef DEBUG Mjoin(PATL,geprint)("L", N, N, A, lda); #endif ATL_checkpad(Uplo, N, A, lda); if (Uplo == AtlasUpper) LmLt = ATL_UtmulU(N, A, lda); else LmLt = ATL_LmulLt(N, A, lda); #ifdef DEBUG Mjoin(PATL,geprint)("L*Lt", N, N, LmLt, N); #endif lltgen(Uplo, N, A, lda, N*1029+lda); /* regen A over LLt */ lltdiff(Uplo, N, A, lda, LmLt, N); #ifdef DEBUG Mjoin(PATL,geprint)("A-L*Lt", N, N, LmLt, N); #endif resid = lltnrm1(Uplo, N, LmLt, N); #ifdef DEBUG if (resid/(normA*eps*N) > 10.0) fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid); #endif resid /= (normA * eps * N); free(LmLt); free(A); return(resid); }
double GetTime(int rout, int mflopF, int lda, int M, int N, int nb, int Uplo, int Side, int flsizeKB) { #if ATL_LINEFLUSH FLSTRUCT *flp; #endif TYPE *A, *wrk=NULL, dtmp, dtmp1, *tau=NULL; int *ipiv=NULL, itmp, wlen; double t0, t1; /* * Call routs that force particular flop count if requested; they return -1.0 * if one invocation will suffice to force mflopF, in which case do the timing * in this routine, which is simpler & doesn't require LRU & as much workspace * If we don't have the ability to do cacheline flushing, must use LRU rout! */ #if ATL_LINEFLUSH if (mflopF > 0) { #endif if (rout == LApotrf) t1 = GetTimeWithReps_LLT(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); else if (rout == LAgeqrf) { if (Side == LARight) { if (Uplo == LAUpper) t1 = GetTimeWithReps_QR(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); else t1 = GetTimeWithReps_QL(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); } else if (Uplo == LAUpper) t1 = GetTimeWithReps_RQ(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); else t1 = GetTimeWithReps_LQ(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); } else t1 = GetTimeWithReps_LU(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); #if ATL_LINEFLUSH == 0 return(t1); #else if (t1 >= 0.0) return(t1); } #endif #if ATL_LINEFLUSH != 0 /* * Generate operands */ A = GetGE(M, N, lda); ATL_assert(A); flp = ATL_GetFlushStruct(A, N*((size_t)lda)*ATL_sizeof, NULL); if (rout == LApotrf) PosDefGen(CblasColMajor, Uplo_LA2ATL(Uplo), N, A, lda); else if (rout & LAgeqrf) { /* QR must allocate workspace */ if (Side == LARight) { if (Uplo == LAUpper) { test_geqrf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1); } else { test_geqlf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1); } } else if (Uplo == LAUpper) { test_gerqf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1); } else { test_gelqf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1); } wlen = dtmp; wrk = calloc(wlen, ATL_sizeof); ATL_assert(wrk); flp = ATL_GetFlushStruct(wrk, wlen*ATL_sizeof, flp); itmp = (M >= N) ? M : N; tau = calloc(itmp, ATL_sizeof); flp = ATL_GetFlushStruct(tau, itmp*ATL_sizeof, flp); } else { ipiv = calloc(M, sizeof(int)); ATL_assert(ipiv); flp = ATL_GetFlushStruct(ipiv, M*sizeof(int), flp); } /* * Flush cache, and do timing */ ATL_FlushAreasByCL(flp); if (rout == LApotrf) { t0 = time00(); test_potrf(Uplo, N, A, lda); t1 = time00(); } else if (rout == LAgeqrf) { if (Side == LARight) { if (Uplo == LAUpper) { t0 = time00(); test_geqrf(CblasColMajor, M, N, A, lda, tau, wrk, wlen); t1 = time00(); } else { t0 = time00(); test_geqlf(CblasColMajor, M, N, A, lda, tau, wrk, wlen); t1 = time00(); } } else if (Uplo == LAUpper) { t0 = time00(); test_gerqf(CblasColMajor, M, N, A, lda, tau, wrk, wlen); t1 = time00(); } else { t0 = time00(); test_gelqf(CblasColMajor, M, N, A, lda, tau, wrk, wlen); t1 = time00(); } } else { t0 = time00(); test_getrf(CblasColMajor, M, N, A, lda, ipiv); t1 = time00(); } if (tau) free(tau); if (wrk) free(wrk); if (ipiv) free(ipiv); free(A); ATL_KillAllFlushStructs(flp); return(t1 - t0); #endif }