static double RunTest (enum CBLAS_ORDER Order, enum TEST_UPLO Uplo, int N, int lda, int CacheSize, TYPE *res) { TYPE *A, *AI, *C; int ierr; double t0, t1; A = GetMat(Order, Uplo, N, lda); #ifdef DEBUG Mjoin(PATL,geprint)("A0", N, N, A, lda); #endif AI = DupMat(Order, N, N, A, lda, lda); t0 = ATL_flushcache(CacheSize); t0 = ATL_flushcache(-1); t0 = time00(); test_inv(Order, Uplo, N, AI, lda); /* AI should now have inverse(A) */ t1 = time00() - t0; t0 = ATL_flushcache(0); #ifdef DEBUG Mjoin(PATL,geprint)("A ", N, N, A, lda); Mjoin(PATL,geprint)("AI", N, N, AI, lda); #endif *res = GetResid(Order, Uplo, N, A, lda, AI, lda); free(AI); free(A); return(t1); }
static TYPE lutestR(int CacheSize, int M, int N, int lda, int *npiv, double *tim) { TYPE *A, *LmU; int *ipiv; const int MN = Mmin(M,N); int i; double t0, t1; TYPE normA, eps, resid; eps = Mjoin(PATL,epsilon)(); A = malloc(ATL_MulBySize(lda)*M); if (A == NULL) return(-1); ipiv = malloc( MN * sizeof(int) ); if (ipiv == NULL) { free(A); return(-1); } t0 = ATL_flushcache(CacheSize); Mjoin(PATL,gegen)(N, M, A, lda, M*N+lda); #ifdef DEBUG Mjoin(PATL,geprint)("A0", N, M, A, lda); #endif normA = Mjoin(PATL,genrm1)(N, M, A, lda); /* actually infnrm, but OK */ t0 = ATL_flushcache(-1); t0 = time00(); test_getrf(CblasRowMajor, M, N, A, lda, ipiv); t1 = time00() - t0; *tim = t1; t0 = ATL_flushcache(0); #ifdef DEBUG Mjoin(PATL,geprint)("LU", N, M, A, lda); #endif LmU = ATL_LmulUR(M, N, A, lda); /* LmU contains L * U */ #ifdef DEBUG Mjoin(PATL,geprint)("L*U", N, M, LmU, N); #endif Mjoin(PATL,gegen)(N, M, A, lda, M*N+lda); /* regenerate A, overwriting LU */ ATL_laswp(M, A, lda, 0, MN, ipiv, 1); /* apply swaps to A */ resid = Mjoin(PATL,gediffnrm1)(N, M, A, lda, LmU, N); resid /= (normA * eps * Mmin(M,N)); *npiv = findnpvt(MN, ipiv); free(LmU); free(A); free(ipiv); return(resid); }
static TYPE uumtest(enum ATLAS_ORDER Order, enum ATLAS_UPLO Uplo, int CacheSize, int N, int lda, double *tim) { TYPE *A, *Ag, *LmLt; double t0, t1; TYPE normA, eps, resid; enum ATLAS_UPLO MyUplo = Uplo; if (Order == CblasRowMajor) { if (Uplo == CblasUpper) MyUplo = CblasLower; else MyUplo = CblasUpper; } eps = Mjoin(PATL,epsilon)(); A = malloc(ATL_MulBySize(lda)*N + ATL_MulBySize(N)*N); if (A == NULL) return(-1); Ag = A + lda*(N SHIFT); t0 = ATL_flushcache(CacheSize); lltgen(MyUplo, N, A, lda, N*1029+lda); lltgen(MyUplo, N, Ag, N, N*1029+lda); normA = lltnrm1(MyUplo, N, A, lda); #ifdef DEBUG Mjoin(PATL,geprint)("A", N, N, A, lda); Mjoin(PATL,geprint)("Ag", N, N, Ag, N); #endif t0 = ATL_flushcache(-1); t0 = time00(); test_lauum(Order, Uplo, N, A, lda); t1 = time00() - t0; *tim = t1; t0 = ATL_flushcache(0); ATL_checkpad(MyUplo, N, A, lda); if (Uplo == CblasUpper) LmLt = ATL_UmulUt(Order, N, Ag, N); else LmLt = ATL_LtmulL(Order, N, Ag, N); #ifdef DEBUG Mjoin(PATL,geprint)("A", N, N, A, lda); Mjoin(PATL,geprint)("Ag", N, N, LmLt, N); #endif lltdiff(MyUplo, N, A, lda, LmLt, N); #ifdef DEBUG Mjoin(PATL,geprint)("A-L*Lt", N, N, LmLt, N); #endif resid = lltnrm1(MyUplo, N, LmLt, N) / (normA * eps * N); if (resid > 10.0 || resid != resid) fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid); free(LmLt); free(A); return(resid); }
static TYPE llttest(enum ATLAS_UPLO Uplo, int CacheSize, int N, int lda, double *tim) { TYPE *A, *LmLt; int i; double t0, t1; TYPE normA, eps, resid; eps = Mjoin(PATL,epsilon)(); A = malloc(ATL_MulBySize(lda)*N); if (A == NULL) return(-1); t0 = ATL_flushcache(CacheSize); lltgen(Uplo, N, A, lda, N*1029+lda); normA = lltnrm1(Uplo, N, A, lda); #ifdef DEBUG Mjoin(PATL,geprint)("A0", N, N, A, lda); #endif t0 = ATL_flushcache(-1); t0 = time00(); test_potrf(Uplo, N, A, lda); t1 = time00() - t0; *tim = t1; t0 = ATL_flushcache(0); #ifdef DEBUG Mjoin(PATL,geprint)("L", N, N, A, lda); #endif ATL_checkpad(Uplo, N, A, lda); if (Uplo == AtlasUpper) LmLt = ATL_UtmulU(N, A, lda); else LmLt = ATL_LmulLt(N, A, lda); #ifdef DEBUG Mjoin(PATL,geprint)("L*Lt", N, N, LmLt, N); #endif lltgen(Uplo, N, A, lda, N*1029+lda); /* regen A over LLt */ lltdiff(Uplo, N, A, lda, LmLt, N); #ifdef DEBUG Mjoin(PATL,geprint)("A-L*Lt", N, N, LmLt, N); #endif resid = lltnrm1(Uplo, N, LmLt, N); #ifdef DEBUG if (resid/(normA*eps*N) > 10.0) fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid); #endif resid /= (normA * eps * N); free(LmLt); free(A); return(resid); }
int mmcase(int TEST, int CACHESIZE, char TA, char TB, int M, int N, int K, SCALAR alpha, TYPE *A, int lda, TYPE *B, int ldb, SCALAR beta, TYPE *C, int ldc, TYPE *D, int ldd) { char *pc; #ifdef TREAL char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %6.2f %5.1f %5.2f %3s\n"; #define MALPH alpha #define MBETA beta #else #define MALPH *alpha, alpha[1] #define MBETA *beta, beta[1] char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %5.1f %5.1f %6.2f %6.1f %4.2f %3s\n"; #endif int ii, jj, i, j=0, PASSED, nerrs; double t0, t1, t2, t3, mflop; TYPE maxval, f1, ferr; static TYPE feps=0.0; static int itst=1; /*int *L2, nL2=(1.3*L2SIZE)/sizeof(int);*/ enum ATLAS_TRANS TAc, TBc; double l2ret; if (!TEST) D = C; /*if (nL2) L2 = malloc(nL2*sizeof(int));*/ l2ret = ATL_flushcache( CACHESIZE ); if (TA == 'n' || TA == 'N') { matgen(M, K, A, lda, K*1112); TAc = AtlasNoTrans; } else { matgen(K, M, A, lda, K*1112); if (TA == 'c' || TA == 'C') TAc = AtlasConjTrans; else TAc = AtlasTrans; } if (TB == 'n' || TB == 'N') { matgen(K, N, B, ldb, N*2238); TBc = AtlasNoTrans; } else { matgen(N, K, B, ldb, N*2238); if (TB == 'c' || TB == 'C') TBc = AtlasConjTrans; else TBc = AtlasTrans; } matgen(M, N, C, ldc, M*N); #ifdef DEBUG printmat("A0", M, K, A, lda); printmat("B0", K, N, B, ldb); printmat("C0", M, N, C, ldc); #endif /* if (L2) { for (i=0; i != nL2; i++) L2[i] = 0.0; for (i=0; i != nL2; i++) j += L2[i]; }*/ /* invalidate L2 cache */ l2ret = ATL_flushcache( -1 ); t0 = time00(); trusted_gemm(TAc, TBc, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); t1 = time00() - t0; if (t1 <= 0.0) mflop = t1 = 0.0; else /* flop rates actually 8MNK+12MN & 2MNK + 2MN, resp */ #ifdef TCPLX mflop = ( ((8.0*M)*N)*K ) / (t1*1000000.0); #else mflop = ( ((2.0*M)*N)*K ) / (t1*1000000.0); #endif printf(form, itst, TA, TB, M, N, K, MALPH, MBETA, t1, mflop, 1.0, "---"); #ifdef DEBUG printmat("C", M, N, C, ldc); #endif #ifndef TIMEONLY matgen(M, N, D, ldd, M*N); /* invalidate L2 cache */ l2ret = ATL_flushcache( -1 ); t0 = time00(); test_gemm(TAc, TBc, M, N, K, alpha, A, lda, B, ldb, beta, D, ldd); t2 = time00() - t0; if (t2 <= 0.0) t2 = mflop = 0.0; else #ifdef TCPLX mflop = ( ((8.0*M)*N)*K ) / (t2*1000000.0); #else mflop = ( ((2.0*M)*N)*K ) / (t2*1000000.0); #endif #ifdef DEBUG printmat("D", M, N, D, ldd); #endif if (TEST) { if (feps == 0.0) { #if 0 f1 = feps = 0.5; do { feps = f1; f1 *= 0.5; maxval = 1.0 + f1; } while (maxval != 1.0); printf("feps=%e\n",feps); #else feps = EPS; #endif #ifdef DEBUG printf("feps=%e\n",feps); #endif } #ifdef TREAL ferr = 2.0 * (Mabs(alpha) * 2.0*K*feps + Mabs(beta) * feps) + feps; #else f1 = Mabs(*alpha) + Mabs(alpha[1]); maxval = Mabs(*beta) + Mabs(beta[1]); ferr = 2.0 * (f1*8.0*K*feps + maxval*feps) + feps; #endif PASSED = 1; maxval = 0.0; pc = "YES"; nerrs = ii = jj = 0; for (j=0; j != N; j++) { for (i=0; i != M SHIFT; i++) { f1 = D[i] - C[i]; if (f1 < 0.0) f1 = -f1; if (f1 > ferr) { nerrs++; PASSED = 0; pc = "NO!"; if (f1 > maxval) { maxval=f1; ii = i+1; jj = j+1; } } } D += ldd SHIFT; C += ldc SHIFT; } if (maxval != 0.0) fprintf(stderr, "ERROR: nerr=%d, i=%d, j=%d, maxval=%e\n", nerrs, ii,jj, maxval); } else pc = "---"; if (t1 == t2) t3 = 1.0; else if (t2 != 0.0) t3 = t1/t2; else t3 = 0.0; printf(form, itst++, TA, TB, M, N, K, MALPH, MBETA, t2, mflop, t3, pc); #else itst++; PASSED = 1; #endif /*free(L2);*/ l2ret = ATL_flushcache( 0 ); return(PASSED); }
static TYPE trtritest(enum ATLAS_ORDER Order, enum ATLAS_UPLO Uplo, enum ATLAS_DIAG Diag, int CacheSize, int N, int lda, double *tim) { TYPE *A, *Acompare; int i; double t0, t1; TYPE normA, eps, resid; /*int ierr;*/ #ifdef TCPLX const TYPE one[2]={ATL_rone, ATL_rzero}; #else const TYPE one = ATL_rone; #endif eps = Mjoin(PATL,epsilon)(); A = malloc(ATL_MulBySize(lda)*N); Acompare = malloc(ATL_MulBySize(lda)*N); if (A == NULL) return(-1); if (Acompare == NULL) return(-1); t0 = ATL_flushcache(CacheSize); /* create random, diagonally dominant matrix with magic value at unused places. Last number is just the random seed. */ trigen(Order, Uplo, Diag, N, A, lda, PADVAL, N*1029+lda); /* Create backup to calculate residual. This one has to be used as a full matrix, so it has zero fills and correct diagonal. */ trigen(Order, Uplo, Diag, N, Acompare, lda, ATL_rzero, N*1029+lda); if (Diag==AtlasUnit) for (i=0; i < N; i++) Acompare[(i*(lda+1)) SHIFT] = ATL_rone; normA = trinrm1(Order,Uplo, Diag, N, A, lda); #ifdef DEBUG Mjoin(PATL,geprint)("A0", N, N, A, lda); #endif t0 = ATL_flushcache(-1); /* Calculate and time a solution */ t0 = time00(); test_trtri(Order, Uplo, Diag, N, A, lda); t1 = time00() - t0; *tim = t1; /* if (ierr != 0) { fprintf(stderr, "Return values != 0 : %d \n",ierr); return(9999.9999); }*/ t0 = ATL_flushcache(0); /* Instroduce a padding error. */ /* A[(5+5*lda)SHIFT]=114.0; */ #ifdef DEBUG Mjoin(PATL,geprint)("L", N, N, A, lda); #endif ATL_checkpad(Order, Uplo, Diag, N, A, lda); /* Calculate A^{-1}*A */ cblas_trmm(Order,CblasLeft,Uplo,AtlasNoTrans,Diag, N,N,one,A,lda,Acompare,lda); #ifdef DEBUG Mjoin(PATL,geprint)("A^{-1}*A", N, N, Acompare, N); #endif /* Subtract diagonal */ for (i=0; i < N; i++) Acompare[i*((lda+1) SHIFT)] -= ATL_rone; /* resid = trinrm1(Order, Uplo,AtlasNonUnit,N,Acompare,lda); fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid); */ resid = Mjoin(PATL,genrm1)(N, N, Acompare, lda); #ifdef DEBUG if (resid/(normA*eps*N) > 10.0) fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid); #endif resid /= (normA * eps * N); free(Acompare); free(A); return(resid); }