static TYPE uumtest(enum ATLAS_ORDER Order, enum ATLAS_UPLO Uplo, int CacheSize, int N, int lda, double *tim) { TYPE *A, *Ag, *LmLt; double t0, t1; TYPE normA, eps, resid; enum ATLAS_UPLO MyUplo = Uplo; if (Order == CblasRowMajor) { if (Uplo == CblasUpper) MyUplo = CblasLower; else MyUplo = CblasUpper; } eps = Mjoin(PATL,epsilon)(); A = malloc(ATL_MulBySize(lda)*N + ATL_MulBySize(N)*N); if (A == NULL) return(-1); Ag = A + lda*(N SHIFT); t0 = ATL_flushcache(CacheSize); lltgen(MyUplo, N, A, lda, N*1029+lda); lltgen(MyUplo, N, Ag, N, N*1029+lda); normA = lltnrm1(MyUplo, N, A, lda); #ifdef DEBUG Mjoin(PATL,geprint)("A", N, N, A, lda); Mjoin(PATL,geprint)("Ag", N, N, Ag, N); #endif t0 = ATL_flushcache(-1); t0 = time00(); test_lauum(Order, Uplo, N, A, lda); t1 = time00() - t0; *tim = t1; t0 = ATL_flushcache(0); ATL_checkpad(MyUplo, N, A, lda); if (Uplo == CblasUpper) LmLt = ATL_UmulUt(Order, N, Ag, N); else LmLt = ATL_LtmulL(Order, N, Ag, N); #ifdef DEBUG Mjoin(PATL,geprint)("A", N, N, A, lda); Mjoin(PATL,geprint)("Ag", N, N, LmLt, N); #endif lltdiff(MyUplo, N, A, lda, LmLt, N); #ifdef DEBUG Mjoin(PATL,geprint)("A-L*Lt", N, N, LmLt, N); #endif resid = lltnrm1(MyUplo, N, LmLt, N) / (normA * eps * N); if (resid > 10.0 || resid != resid) fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid); free(LmLt); free(A); return(resid); }
static TYPE llttest(enum ATLAS_UPLO Uplo, int CacheSize, int N, int lda, double *tim) { TYPE *A, *LmLt; int i; double t0, t1; TYPE normA, eps, resid; eps = Mjoin(PATL,epsilon)(); A = malloc(ATL_MulBySize(lda)*N); if (A == NULL) return(-1); t0 = ATL_flushcache(CacheSize); lltgen(Uplo, N, A, lda, N*1029+lda); normA = lltnrm1(Uplo, N, A, lda); #ifdef DEBUG Mjoin(PATL,geprint)("A0", N, N, A, lda); #endif t0 = ATL_flushcache(-1); t0 = time00(); test_potrf(Uplo, N, A, lda); t1 = time00() - t0; *tim = t1; t0 = ATL_flushcache(0); #ifdef DEBUG Mjoin(PATL,geprint)("L", N, N, A, lda); #endif ATL_checkpad(Uplo, N, A, lda); if (Uplo == AtlasUpper) LmLt = ATL_UtmulU(N, A, lda); else LmLt = ATL_LmulLt(N, A, lda); #ifdef DEBUG Mjoin(PATL,geprint)("L*Lt", N, N, LmLt, N); #endif lltgen(Uplo, N, A, lda, N*1029+lda); /* regen A over LLt */ lltdiff(Uplo, N, A, lda, LmLt, N); #ifdef DEBUG Mjoin(PATL,geprint)("A-L*Lt", N, N, LmLt, N); #endif resid = lltnrm1(Uplo, N, LmLt, N); #ifdef DEBUG if (resid/(normA*eps*N) > 10.0) fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid); #endif resid /= (normA * eps * N); free(LmLt); free(A); return(resid); }
int RunCase(int CacheSize, TYPE thresh, int MFLOP, enum ATLAS_ORDER Order, enum ATLAS_UPLO Uplo, int N, int lda) { char *Ups, *Ord; TYPE resid = 0.0; double mflop, mflops, t0, tim=0.0; int nreps=1, passed, i, imem; const int incA = lda*N; TYPE *a, *A; mflops = N; mflops = (mflops*mflops*mflops) / 4.0; #ifdef TCPLX mflops *= 4.0; #endif mflops /= 1000000.0; if (thresh > ATL_rzero) resid = uumtest(Order, Uplo, CacheSize, N, lda, &tim); else resid = -1.0; if (MFLOP > mflops || thresh <= ATL_rzero) /* need to time repetitively */ { nreps = (mflops * 1000000); nreps = (MFLOP*1000000 + nreps-1) / nreps; if (nreps < 1) nreps = 1; imem = ATL_DivBySize(CacheSize) ATL_PTCACHEMUL; imem = (imem + 2*N*N-1) / (N*N); if (imem < nreps) imem = nreps; a = A = malloc(imem * ATL_MulBySize(incA)); if (A != NULL) { for (i=0; i < imem; i++) lltgen(Uplo, N, A+i*incA, lda, N*1029+lda); t0 = time00(); for (i=nreps; i; i--, a += incA) test_lauum(Order, Uplo, N, a, lda); tim = time00() - t0; tim /= nreps; free(A); } else fprintf(stderr, " WARNING: not enough mem to run timings!\n"); } if (tim > 0.0) mflop = mflops / tim; else mflop = 0.0; if (Uplo == AtlasUpper) Ups = "Upper"; else Ups = "Lower"; if (Order == CblasColMajor) Ord = "Col"; else Ord = "Row"; fprintf(stdout, "%5d %3s %5s %6d %6d %12.5f %12.3f %12e\n", nreps, Ord, Ups, N, lda, tim, mflop, resid); if (resid > thresh || resid != resid) passed = 0; else if (resid < 0.0) passed = -1; else passed = 1; return(passed); }