int main(int, char **) { test_gemm("test_162_1", 100, 50, 32, 3, 4); test_gemm("test_162_2", 1, 2, 3, 4, 7); test_gemm("test_162_3", 128, 128, 128, 3, -1); test_gemm("test_162_4", 100, 100, 100, 1, 0); test_gemm("test_162_5", 100, 100, 100, 0, 1); return 0; }
int main(int argn, char **argv) { cmd_args args; args.register_key("--M=", "{int} M"); args.register_key("--N=", "{int} N"); args.register_key("--K=", "{int} K"); args.register_key("--repeat=", "{int} repeat test number of times"); args.parse_args(argn, argv); if (args.exist("help")) { printf("Usage: %s [options]\n", argv[0]); args.print_help(); return 0; } int M = args.value<int>("M"); int N = args.value<int>("N"); int K = args.value<int>("K"); int repeat = args.value<int>("repeat", 1); sirius::initialize(true); double perf = 0; for (int i = 0; i < repeat; i++) perf += test_gemm(M, N, K); if (mpi_comm_world().rank() == 0) { printf("\n"); printf("average performance : %12.6f GFlops / rank\n", perf / repeat); } sirius::finalize(); }
int main() { matrix_construction(); matrix_access(); test_cblas_dgemm(); test_basic_alloc(); test_asum(); test_axpy(); test_copy(); test_dot(); test_sdot(); test_dotc(); test_dotu(); test_nrm2(); test_rot(); test_rotg(); test_scal(); test_swap(); test_iamax(); test_iamin(); test_dcabs1(); test_gemv(); test_gemm(); test_vmul(); test_vml(); test_std_vector_vml(); test_gemm_boost(); return 0; }
int main(int, char **) { test_gemm("test_164_basic", 100, 50, 32, 3, 4, 100, 32, 32, 50, 100, 50, 0, 0, 0, false, false); test_gemm("test_164_ldA", 100, 50, 32, 3, 4, 1000, 1000, 32, 50, 100, 50, 0, 0, 0, false, false); test_gemm("test_164_ld", 15, 107, 11, 75, 14, 1000, 3000, 2000, 300, 164, 132, 0, 0, 0, false, false); test_gemm("test_164_trA", 51, 17, 21, 75, 15, 1000, 2000, 2000, 300, 164, 232, 0, 0, 0, true, false); test_gemm("test_164_trB", 51, 17, 21, 75, 15, 100, 20000, 222, 131, 100, 123, 0, 0, 0, false, true); test_gemm("test_164_offset", 51, 17, 21, 75, 15, 100, 20000, 222, 131, 100, 123, 20000 * 4 + 3, 131 * 5 + 6, 123 * 4 + 23, false, false); test_gemm("test_164_all", 51, 17, 21, 75, 15, 100, 20000, 222, 131, 100, 123, 20000 * 4 + 3, 131 * 5 + 6, 123 * 4 + 23, true, true); return 0; }
main(int nargs, char *args[]) /* * tst <tst> <# TA> <TA's> <# TB's> <TB's> <M0> <MN> <incM> <N0> <NN> <incN> * <K0> <KN> <incK> <# alphas> <alphas> <# betas> <betas> * */ { int M0, MN, incM, N0, NN, incN, K0, KN, incK, lda, ldb, ldc, MFLOP; int i, k, m, n, im, in, ik, ita, itb, ia, ib, nTA, nTB, nalph, nbeta; int itst=0, ipass=0, TEST, LDA_IS_M, MSAME=0, KSAME=0; int ndiag, nuplo, nside; TYPE *alph, *beta, *A, *B, *C, *D=NULL; #ifdef TREAL TYPE bet1 = 1.0, alp1 = -1.0; #else TYPE bet1[2] = {1.0, 0.0}, alp1[2] = {-1.0, 0.0}; #endif char TA, TB; enum ATLAS_SIDE *Side; enum ATLAS_UPLO *Uplo; enum ATLAS_TRANS *TransA, *TransB, TAc, TBc; enum ATLAS_DIAG *Diag; int CACHESIZE; GetFlags(nargs, args, &TEST, &nside, &Side, &nuplo, &Uplo, &nTA, &TransA, &nTB, &TransB, &ndiag, &Diag, &M0, &MN, &incM, &N0, &NN, &incN, &K0, &KN, &incK, &nalph, &alph, &nbeta, &beta, &LDA_IS_M, &MFLOP,&CACHESIZE); if (M0 == -1) { MSAME = 1; M0 = MN = incM = NN; } if (K0 == -1) { KSAME = 1; K0 = KN = incK = NN; } if (!MFLOP) { A = malloc(MN*KN*ATL_sizeof); B = malloc(NN*KN*ATL_sizeof); C = malloc(MN*NN*ATL_sizeof); if (TEST) D = malloc(MN*NN*ATL_sizeof); else D = NULL; if (!A || !B || !C || (TEST && !D)) { fprintf(stderr, "Not enough memory to run tests!!\n"); exit(-1); } } /* * Page the code in from disk, so first timing doesn't blow */ if (MFLOP) { mmcase0(10, 1, 'n', 'n', 100, 100, 100, alp1, 100, 100, bet1, 100); mmcase0(10, 1, 'n', 't', 100, 100, 100, alp1, 100, 100, bet1, 100); mmcase0(10, 1, 't', 'n', 100, 100, 100, alp1, 100, 100, bet1, 100); mmcase0(10, 1, 't', 't', 100, 100, 100, alp1, 100, 100, bet1, 100); } else { m = Mmin(100, MN); k = Mmin(100, KN); n = Mmin(100, NN); matgen(m, k, A, m, m*k); matgen(k, n, B, k, n*k); matgen(m, n, C, m, m*n); TA = TB = 'N'; TAc = TBc = AtlasNoTrans; trusted_gemm(TAc, TBc, m, n, k, alp1, A, m, B, k, bet1, C, m); test_gemm(TAc, TBc, m, n, k, alp1, A, m, B, k, bet1, C, m); } #ifdef TREAL printf("\nTEST TA TB M N K alpha beta Time Mflop SpUp PASS\n"); printf("==== == == === === === ===== ===== ====== ===== ==== ====\n\n"); #else printf("\nTEST TA TB M N K alpha beta Time Mflop SpUp PASS\n"); printf("==== == == === === === ===== ===== ===== ===== ====== ===== ==== ====\n\n"); #endif for (im=M0; im <= MN; im += incM) { for (n=N0; n <= NN; n += incN) { if (MSAME) m = n; else m = im; for (ik=K0; ik <= KN; ik += incK) { if (KSAME) k = n; else k = ik; for (ita=0; ita != nTA; ita++) { if (TransA[ita] == AtlasNoTrans) TA = 'N'; else if (TransA[ita] == AtlasTrans) TA = 'T'; else if (TransA[ita] == AtlasConjTrans) TA = 'C'; for (itb=0; itb != nTB; itb++) { if (TransB[itb] == AtlasNoTrans) TB = 'N'; else if (TransB[itb] == AtlasTrans) TB = 'T'; else if (TransB[itb] == AtlasConjTrans) TB = 'C'; for (ia=0; ia != nalph; ia++) { for (ib=0; ib != nbeta; ib++) { itst++; if (LDA_IS_M) { if (TA == 'n' || TA == 'N') lda = m; else lda = k; if (TB == 'n' || TB == 'N') ldb = k; else ldb = n; ldc = m; } else { if (TA == 'n' || TA == 'N') lda = MN; else lda = KN; if (TB == 'n' || TB == 'N') ldb = KN; else ldb = NN; ldc = MN; } if (MFLOP) { ipass++; #ifdef TREAL mmcase0(MFLOP, CACHESIZE, TA, TB, m, n, k, alph[ia], lda, ldb, beta[ib], ldc); #else mmcase0(MFLOP, CACHESIZE, TA, TB, m, n, k, alph+(ia SHIFT), lda, ldb, beta+(ib SHIFT), ldc); #endif } else { #ifdef TREAL ipass += mmcase(TEST, CACHESIZE, TA, TB, m, n, k, alph[ia], A, lda, B, ldb, beta[ib], C, ldc, D,ldc); #else ipass += mmcase(TEST, CACHESIZE, TA, TB, m, n, k, alph+(ia SHIFT), A, lda, B, ldb, beta+(ib SHIFT), C, ldc, D,ldc); #endif } } } } } } } } if (TEST && !MFLOP) printf("\nNTEST=%d, NUMBER PASSED=%d, NUMBER FAILURES=%d\n", itst, ipass, itst-ipass); else printf("\nDone with %d timing runs\n",itst); free(Side); free(Uplo); free(TransA); free(TransB); free(Diag); free(alph); free(beta); if (!MFLOP) { free(A); free(B); free(C); if (D) free(D); } exit(0); }
int mmcase0(int MFLOP, int CACHESIZE, char TA, char TB, int M, int N, int K, SCALAR alpha, int lda, int ldb, SCALAR beta, int ldc) { char *pc; #ifdef TREAL char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %6.2f %5.1f %5.2f %3s\n"; #define MALPH alpha #define MBETA beta TYPE betinv, bet=beta; #else #define MALPH *alpha, alpha[1] #define MBETA *beta, beta[1] char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %5.1f %5.1f %6.2f %6.1f %4.2f %3s\n"; TYPE betinv[2], *bet=beta; #endif int nreps, incA, incB, incC, inc, nmat, k; TYPE *c, *C, *a, *A, *b, *B, *st; int ii, jj, i, j=0, PASSED, nerrs; double t0, t1, t2, t3, mflop, mf, mops; TYPE maxval, f1, ferr; static TYPE feps=0.0; static int itst=1; enum ATLAS_TRANS TAc, TBc; void *vp; #ifdef TCPLX if (*beta == 0.0 && beta[1] == 0.0) betinv[0] = betinv[1] = 0.0; else if (beta[1] == 0.0) { betinv[0] = 1 / *beta; betinv[1] = 0.0; } else { t0 = *beta; t1 = beta[1]; if (Mabs(t1) <= Mabs(t0)) { t2 = t1 / t0; betinv[0] = t0 = 1.0 / (t0 + t1*t2); betinv[1] = -t0 * t2; } else { t2 = t0 / t1; betinv[1] = t0 = -1.0 / (t1 + t0*t2); betinv[0] = -t2 * t0; } } mops = ( ((8.0*M)*N)*K ) / 1000000.0; #else if (beta != 0.0) betinv = 1.0 / beta; else betinv = beta; mops = ( ((2.0*M)*N)*K ) / 1000000.0; #endif nreps = MFLOP / mops; if (nreps < 1) nreps = 1; if (TA == 'n' || TA == 'N') { TAc = AtlasNoTrans; incA = lda * K; } else { if (TA == 'c' || TA == 'C') TAc = AtlasConjTrans; else TAc = AtlasTrans; incA = lda * M; } if (TB == 'n' || TB == 'N') { incB = ldb * N; TBc = AtlasNoTrans; } else { incB = ldb * K; if (TB == 'c' || TB == 'C') TBc = AtlasConjTrans; else TBc = AtlasTrans; } incC = ldc*N; inc = incA + incB + incC; i = M*K + K*N + M*N; /* amount of inc actually referenced */ /* This is a hack; change to use of flushcache instead. */ nmat = ((CACHESIZE/ATL_sizeof) + i)/i; vp = malloc(ATL_MulBySize(nmat*inc)+ATL_Cachelen); ATL_assert(vp); C = c = ATL_AlignPtr(vp); a = A = C + incC; b = B = A + incA; st = C + nmat*inc; matgen(inc, nmat, C, inc, M*N); #ifdef DEBUG printmat("A0", M, K, A, lda); printmat("B0", K, N, B, ldb); printmat("C0", M, N, C, ldc); #endif t0 = time00(); for (k=nreps; k; k--) { trusted_gemm(TAc, TBc, M, N, K, alpha, a, lda, b, ldb, bet, c, ldc); c += inc; a += inc; b += inc; if (c == st) { c = C; a = A; b = B; if (bet == beta) bet = betinv; else bet = beta; } } t1 = time00() - t0; t1 /= nreps; if (t1 <= 0.0) mflop = t1 = 0.0; else /* flop rates actually 8MNK+12MN & 2MNK + 2MN, resp */ mflop = mops / t1; printf(form, itst, TA, TB, M, N, K, MALPH, MBETA, t1, mflop, 1.0, "---"); #ifdef DEBUG printmat("C", M, N, C, ldc); #endif matgen(inc, nmat, C, inc, M*N); t0 = time00(); for (k=nreps; k; k--) { test_gemm(TAc, TBc, M, N, K, alpha, a, lda, b, ldb, bet, c, ldc); c += inc; a += inc; b += inc; if (c == st) { c = C; a = A; b = B; if (bet == beta) bet = betinv; else bet = beta; } } t2 = time00() - t0; t2 /= nreps; if (t2 <= 0.0) t2 = mflop = 0.0; else mflop = mops / t2; pc = "---"; if (t1 == t2) t3 = 1.0; else if (t2 != 0.0) t3 = t1/t2; else t3 = 0.0; printf(form, itst++, TA, TB, M, N, K, MALPH, MBETA, t2, mflop, t3, pc); free(vp); return(1); }
int mmcase(int TEST, int CACHESIZE, char TA, char TB, int M, int N, int K, SCALAR alpha, TYPE *A, int lda, TYPE *B, int ldb, SCALAR beta, TYPE *C, int ldc, TYPE *D, int ldd) { char *pc; #ifdef TREAL char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %6.2f %5.1f %5.2f %3s\n"; #define MALPH alpha #define MBETA beta #else #define MALPH *alpha, alpha[1] #define MBETA *beta, beta[1] char *form="%4d %c %c %4d %4d %4d %5.1f %5.1f %5.1f %5.1f %6.2f %6.1f %4.2f %3s\n"; #endif int ii, jj, i, j=0, PASSED, nerrs; double t0, t1, t2, t3, mflop; TYPE maxval, f1, ferr; static TYPE feps=0.0; static int itst=1; /*int *L2, nL2=(1.3*L2SIZE)/sizeof(int);*/ enum ATLAS_TRANS TAc, TBc; double l2ret; if (!TEST) D = C; /*if (nL2) L2 = malloc(nL2*sizeof(int));*/ l2ret = ATL_flushcache( CACHESIZE ); if (TA == 'n' || TA == 'N') { matgen(M, K, A, lda, K*1112); TAc = AtlasNoTrans; } else { matgen(K, M, A, lda, K*1112); if (TA == 'c' || TA == 'C') TAc = AtlasConjTrans; else TAc = AtlasTrans; } if (TB == 'n' || TB == 'N') { matgen(K, N, B, ldb, N*2238); TBc = AtlasNoTrans; } else { matgen(N, K, B, ldb, N*2238); if (TB == 'c' || TB == 'C') TBc = AtlasConjTrans; else TBc = AtlasTrans; } matgen(M, N, C, ldc, M*N); #ifdef DEBUG printmat("A0", M, K, A, lda); printmat("B0", K, N, B, ldb); printmat("C0", M, N, C, ldc); #endif /* if (L2) { for (i=0; i != nL2; i++) L2[i] = 0.0; for (i=0; i != nL2; i++) j += L2[i]; }*/ /* invalidate L2 cache */ l2ret = ATL_flushcache( -1 ); t0 = time00(); trusted_gemm(TAc, TBc, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); t1 = time00() - t0; if (t1 <= 0.0) mflop = t1 = 0.0; else /* flop rates actually 8MNK+12MN & 2MNK + 2MN, resp */ #ifdef TCPLX mflop = ( ((8.0*M)*N)*K ) / (t1*1000000.0); #else mflop = ( ((2.0*M)*N)*K ) / (t1*1000000.0); #endif printf(form, itst, TA, TB, M, N, K, MALPH, MBETA, t1, mflop, 1.0, "---"); #ifdef DEBUG printmat("C", M, N, C, ldc); #endif #ifndef TIMEONLY matgen(M, N, D, ldd, M*N); /* invalidate L2 cache */ l2ret = ATL_flushcache( -1 ); t0 = time00(); test_gemm(TAc, TBc, M, N, K, alpha, A, lda, B, ldb, beta, D, ldd); t2 = time00() - t0; if (t2 <= 0.0) t2 = mflop = 0.0; else #ifdef TCPLX mflop = ( ((8.0*M)*N)*K ) / (t2*1000000.0); #else mflop = ( ((2.0*M)*N)*K ) / (t2*1000000.0); #endif #ifdef DEBUG printmat("D", M, N, D, ldd); #endif if (TEST) { if (feps == 0.0) { #if 0 f1 = feps = 0.5; do { feps = f1; f1 *= 0.5; maxval = 1.0 + f1; } while (maxval != 1.0); printf("feps=%e\n",feps); #else feps = EPS; #endif #ifdef DEBUG printf("feps=%e\n",feps); #endif } #ifdef TREAL ferr = 2.0 * (Mabs(alpha) * 2.0*K*feps + Mabs(beta) * feps) + feps; #else f1 = Mabs(*alpha) + Mabs(alpha[1]); maxval = Mabs(*beta) + Mabs(beta[1]); ferr = 2.0 * (f1*8.0*K*feps + maxval*feps) + feps; #endif PASSED = 1; maxval = 0.0; pc = "YES"; nerrs = ii = jj = 0; for (j=0; j != N; j++) { for (i=0; i != M SHIFT; i++) { f1 = D[i] - C[i]; if (f1 < 0.0) f1 = -f1; if (f1 > ferr) { nerrs++; PASSED = 0; pc = "NO!"; if (f1 > maxval) { maxval=f1; ii = i+1; jj = j+1; } } } D += ldd SHIFT; C += ldc SHIFT; } if (maxval != 0.0) fprintf(stderr, "ERROR: nerr=%d, i=%d, j=%d, maxval=%e\n", nerrs, ii,jj, maxval); } else pc = "---"; if (t1 == t2) t3 = 1.0; else if (t2 != 0.0) t3 = t1/t2; else t3 = 0.0; printf(form, itst++, TA, TB, M, N, K, MALPH, MBETA, t2, mflop, t3, pc); #else itst++; PASSED = 1; #endif /*free(L2);*/ l2ret = ATL_flushcache( 0 ); return(PASSED); }