static void hegen (enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, int N, TYPE *A, int lda) { #ifdef POSDEFGEN PosDefGen(Order, Uplo, N, A, lda); #else MakeHEDiagDom(Order, Uplo, N, A, lda); CrapUpTri(Order, Uplo, N, A, lda); #endif }
double GetTimeWithReps_LLT (int mflopF, int lda, int M, int N, int nb, int Uplo, int Side, int flsizeKB) { double mflop, t0, t1, drep; char *wrksets; /* working sets for kernel calls */ #ifdef TCPLX const int lda2 = lda+lda; #else const int lda2 = lda; #endif size_t setsz, setszT; /* work set size in memory, and amnt of it touched */ size_t nrep; /* # of reps required to force mflopF flops */ size_t nset; /* # of working sets allocated */ int i; setsz=lda*N*ATL_sizeof; /* matrix is entire working set of LLt */ setszT=N*N*ATL_sizeof; /* only touch N*N portion */ mflop = GetFlopCount(LApotrf, Uplo, M, N, 0, 0, CAN_NB); /* * Cannot reuse matrices (bogus to factor an already factored matrix), so we * must take as our total memspace MAX(nrep,nset)*setsz */ ATL_assert(mflop > 0.0); drep = (mflopF*1.0e6) / mflop; nrep = (int)(drep+0.999999); /* * If cacheline flush doesn't work, then we must use this method */ #if ATL_LINEFLUSH if (nrep < 2) return(-1.0); /* do wt normal timer */ #else nrep = (nrep >= 1) ? nrep : 1; #endif nset = (flsizeKB*1024+setszT-1)/setszT; if (nset < nrep) nset = nrep; wrksets = malloc(nset * setsz); ATL_assert(wrksets); for (i=0; i < nset; i++) PosDefGen(CblasColMajor, Uplo_LA2ATL(Uplo), N, (TYPE*)(wrksets+i*setsz), lda); t0 = time00(); for (i=0; i < nrep; i++) { test_potrf(Uplo, N, (TYPE*)(wrksets+i*setsz), lda); } t1 = time00(); free(wrksets); return((t1-t0)/((double)nrep)); }
double GetTime(int rout, int mflopF, int lda, int M, int N, int nb, int Uplo, int Side, int flsizeKB) { #if ATL_LINEFLUSH FLSTRUCT *flp; #endif TYPE *A, *wrk=NULL, dtmp, dtmp1, *tau=NULL; int *ipiv=NULL, itmp, wlen; double t0, t1; /* * Call routs that force particular flop count if requested; they return -1.0 * if one invocation will suffice to force mflopF, in which case do the timing * in this routine, which is simpler & doesn't require LRU & as much workspace * If we don't have the ability to do cacheline flushing, must use LRU rout! */ #if ATL_LINEFLUSH if (mflopF > 0) { #endif if (rout == LApotrf) t1 = GetTimeWithReps_LLT(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); else if (rout == LAgeqrf) { if (Side == LARight) { if (Uplo == LAUpper) t1 = GetTimeWithReps_QR(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); else t1 = GetTimeWithReps_QL(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); } else if (Uplo == LAUpper) t1 = GetTimeWithReps_RQ(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); else t1 = GetTimeWithReps_LQ(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); } else t1 = GetTimeWithReps_LU(mflopF, lda, M, N, nb, Uplo, Side, flsizeKB); #if ATL_LINEFLUSH == 0 return(t1); #else if (t1 >= 0.0) return(t1); } #endif #if ATL_LINEFLUSH != 0 /* * Generate operands */ A = GetGE(M, N, lda); ATL_assert(A); flp = ATL_GetFlushStruct(A, N*((size_t)lda)*ATL_sizeof, NULL); if (rout == LApotrf) PosDefGen(CblasColMajor, Uplo_LA2ATL(Uplo), N, A, lda); else if (rout & LAgeqrf) { /* QR must allocate workspace */ if (Side == LARight) { if (Uplo == LAUpper) { test_geqrf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1); } else { test_geqlf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1); } } else if (Uplo == LAUpper) { test_gerqf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1); } else { test_gelqf(CblasColMajor, M, N, A, lda, &dtmp1, &dtmp, -1); } wlen = dtmp; wrk = calloc(wlen, ATL_sizeof); ATL_assert(wrk); flp = ATL_GetFlushStruct(wrk, wlen*ATL_sizeof, flp); itmp = (M >= N) ? M : N; tau = calloc(itmp, ATL_sizeof); flp = ATL_GetFlushStruct(tau, itmp*ATL_sizeof, flp); } else { ipiv = calloc(M, sizeof(int)); ATL_assert(ipiv); flp = ATL_GetFlushStruct(ipiv, M*sizeof(int), flp); } /* * Flush cache, and do timing */ ATL_FlushAreasByCL(flp); if (rout == LApotrf) { t0 = time00(); test_potrf(Uplo, N, A, lda); t1 = time00(); } else if (rout == LAgeqrf) { if (Side == LARight) { if (Uplo == LAUpper) { t0 = time00(); test_geqrf(CblasColMajor, M, N, A, lda, tau, wrk, wlen); t1 = time00(); } else { t0 = time00(); test_geqlf(CblasColMajor, M, N, A, lda, tau, wrk, wlen); t1 = time00(); } } else if (Uplo == LAUpper) { t0 = time00(); test_gerqf(CblasColMajor, M, N, A, lda, tau, wrk, wlen); t1 = time00(); } else { t0 = time00(); test_gelqf(CblasColMajor, M, N, A, lda, tau, wrk, wlen); t1 = time00(); } } else { t0 = time00(); test_getrf(CblasColMajor, M, N, A, lda, ipiv); t1 = time00(); } if (tau) free(tau); if (wrk) free(wrk); if (ipiv) free(ipiv); free(A); ATL_KillAllFlushStructs(flp); return(t1 - t0); #endif }