Ejemplo n.º 1
0
int main(int, char **)
{
    test_gemm("test_162_1", 100, 50, 32, 3, 4);
    test_gemm("test_162_2", 1, 2, 3, 4, 7);
    test_gemm("test_162_3", 128, 128, 128, 3, -1);
    test_gemm("test_162_4", 100, 100, 100, 1, 0);
    test_gemm("test_162_5", 100, 100, 100, 0, 1);

    return 0;
}
int main(int argn, char **argv)
{
    cmd_args args;
    args.register_key("--M=", "{int} M");
    args.register_key("--N=", "{int} N");
    args.register_key("--K=", "{int} K");
    args.register_key("--repeat=", "{int} repeat test number of times");

    args.parse_args(argn, argv);
    if (args.exist("help"))
    {
        printf("Usage: %s [options]\n", argv[0]);
        args.print_help();
        return 0;
    }

    int M = args.value<int>("M");
    int N = args.value<int>("N");
    int K = args.value<int>("K");

    int repeat = args.value<int>("repeat", 1);

    sirius::initialize(true);

    double perf = 0;
    for (int i = 0; i < repeat; i++) perf += test_gemm(M, N, K);
    if (mpi_comm_world().rank() == 0)
    {
        printf("\n");
        printf("average performance    : %12.6f GFlops / rank\n", perf / repeat);
    }

    sirius::finalize();
}
Ejemplo n.º 3
0
int main()
{
  matrix_construction();
  matrix_access();
  test_cblas_dgemm();
  test_basic_alloc();
  test_asum();
  test_axpy();
  test_copy();
  test_dot();
  test_sdot();
  test_dotc();
  test_dotu();
  test_nrm2();
  test_rot();
  test_rotg();
  test_scal();
  test_swap();
  test_iamax();
  test_iamin();
  test_dcabs1();
  test_gemv();
  test_gemm();
  test_vmul();
  test_vml();
  test_std_vector_vml();
  test_gemm_boost();
  return 0;
}
Ejemplo n.º 4
0
int main(int, char **)
{
    test_gemm("test_164_basic",
              100, 50, 32,
              3, 4,
              100, 32,
              32, 50,
              100, 50,
              0, 0, 0,
              false, false);
    test_gemm("test_164_ldA",
              100, 50, 32,
              3, 4,
              1000, 1000,
              32, 50,
              100, 50,
              0, 0, 0,
              false, false);
    test_gemm("test_164_ld",
              15, 107, 11,
              75, 14,
              1000, 3000,
              2000, 300,
              164, 132,
              0, 0, 0,
              false, false);
    test_gemm("test_164_trA",
              51, 17, 21,
              75, 15,
              1000, 2000,
              2000, 300,
              164, 232,
              0, 0, 0,
              true, false);
    test_gemm("test_164_trB",
              51, 17, 21,
              75, 15,
              100, 20000,
              222, 131,
              100, 123,
              0, 0, 0,
              false, true);
    test_gemm("test_164_offset",
              51, 17, 21,
              75, 15,
              100, 20000,
              222, 131,
              100, 123,
              20000 * 4 + 3, 131 * 5 + 6, 123 * 4 + 23,
              false, false);
    test_gemm("test_164_all",
              51, 17, 21,
              75, 15,
              100, 20000,
              222, 131,
              100, 123,
              20000 * 4 + 3, 131 * 5 + 6, 123 * 4 + 23,
              true, true);
    return 0;
}
Ejemplo n.º 5
0
main(int nargs, char *args[])
/*
 *  tst <tst> <# TA> <TA's> <# TB's> <TB's> <M0> <MN> <incM> <N0> <NN> <incN>
 *      <K0> <KN> <incK> <# alphas> <alphas> <# betas> <betas>
 *
 */
{
   int M0, MN, incM, N0, NN, incN, K0, KN, incK, lda, ldb, ldc, MFLOP;
   int i, k, m, n, im, in, ik, ita, itb, ia, ib, nTA, nTB, nalph, nbeta;
   int itst=0, ipass=0, TEST, LDA_IS_M, MSAME=0, KSAME=0;
   int ndiag, nuplo, nside;
   TYPE *alph, *beta, *A, *B, *C, *D=NULL;
   #ifdef TREAL
      TYPE bet1 = 1.0, alp1 = -1.0;
   #else
      TYPE bet1[2] = {1.0, 0.0}, alp1[2] = {-1.0, 0.0};
   #endif
   char TA, TB;
   enum ATLAS_SIDE *Side;
   enum ATLAS_UPLO *Uplo;
   enum ATLAS_TRANS *TransA, *TransB, TAc, TBc;
   enum ATLAS_DIAG *Diag;
   int CACHESIZE;

   GetFlags(nargs, args, &TEST, &nside, &Side, &nuplo, &Uplo,
            &nTA, &TransA, &nTB, &TransB, &ndiag, &Diag,
            &M0, &MN, &incM, &N0, &NN, &incN, &K0, &KN, &incK,
            &nalph, &alph, &nbeta, &beta, &LDA_IS_M, &MFLOP,&CACHESIZE);

   if (M0 == -1)
   {
      MSAME = 1;
      M0 = MN = incM = NN;
   }
   if (K0 == -1)
   {
      KSAME = 1;
      K0 = KN = incK = NN;
   }

   if (!MFLOP)
   {
      A = malloc(MN*KN*ATL_sizeof);
      B = malloc(NN*KN*ATL_sizeof);
      C = malloc(MN*NN*ATL_sizeof);
      if (TEST) D = malloc(MN*NN*ATL_sizeof);
      else D = NULL;
      if (!A || !B || !C || (TEST && !D))
      {
         fprintf(stderr, "Not enough memory to run tests!!\n");
         exit(-1);
      }
   }
/*
 * Page the code in from disk, so first timing doesn't blow
 */
   if (MFLOP)
   {
      mmcase0(10, 1, 'n', 'n', 100, 100, 100, alp1, 100, 100, bet1, 100);
      mmcase0(10, 1, 'n', 't', 100, 100, 100, alp1, 100, 100, bet1, 100);
      mmcase0(10, 1, 't', 'n', 100, 100, 100, alp1, 100, 100, bet1, 100);
      mmcase0(10, 1, 't', 't', 100, 100, 100, alp1, 100, 100, bet1, 100);
   }
   else
   {
      m = Mmin(100, MN);
      k = Mmin(100, KN);
      n = Mmin(100, NN);
      matgen(m, k, A, m, m*k);
      matgen(k, n, B, k, n*k);
      matgen(m, n, C, m, m*n);
      TA = TB = 'N';
      TAc = TBc = AtlasNoTrans;
      trusted_gemm(TAc, TBc, m, n, k, alp1, A, m, B, k, bet1, C, m);
      test_gemm(TAc, TBc, m, n, k, alp1, A, m, B, k, bet1, C, m);
   }

#ifdef TREAL
   printf("\nTEST  TA  TB    M    N    K  alpha   beta    Time  Mflop  SpUp  PASS\n");
   printf("====  ==  ==  ===  ===  ===  =====  =====  ======  =====  ====  ====\n\n");
#else
   printf("\nTEST  TA  TB    M    N    K        alpha         beta    Time  Mflop  SpUp  PASS\n");
   printf("====  ==  ==  ===  ===  ===  ===== =====  ===== =====  ======  =====  ====  ====\n\n");
#endif
   for (im=M0; im <= MN; im += incM)
   {
      for (n=N0; n <= NN; n += incN)
      {
         if (MSAME) m = n;
         else m = im;
         for (ik=K0; ik <= KN; ik += incK)
         {
            if (KSAME) k = n;
            else k = ik;
            for (ita=0; ita != nTA; ita++)
            {
               if (TransA[ita] == AtlasNoTrans) TA = 'N';
               else if (TransA[ita] == AtlasTrans) TA = 'T';
               else if (TransA[ita] == AtlasConjTrans) TA = 'C';

               for (itb=0; itb != nTB; itb++)
               {
                  if (TransB[itb] == AtlasNoTrans) TB = 'N';
                  else if (TransB[itb] == AtlasTrans) TB = 'T';
                  else if (TransB[itb] == AtlasConjTrans) TB = 'C';
                  for (ia=0; ia != nalph; ia++)
                  {
                     for (ib=0; ib != nbeta; ib++)
                     {
                        itst++;
                        if (LDA_IS_M)
                        {
                           if (TA == 'n' || TA == 'N') lda = m;
                           else lda = k;
                           if (TB == 'n' || TB == 'N') ldb = k;
                           else ldb = n;
                           ldc = m;
                        }
                        else
                        {
                           if (TA == 'n' || TA == 'N') lda = MN;
                           else lda = KN;
                           if (TB == 'n' || TB == 'N') ldb = KN;
                           else ldb = NN;
                           ldc = MN;
                        }

                        if (MFLOP)
                        {
                           ipass++;
#ifdef TREAL
                              mmcase0(MFLOP, CACHESIZE, TA, TB, m, n, k,
				      alph[ia], lda, ldb, beta[ib], ldc);
#else
                              mmcase0(MFLOP, CACHESIZE, TA, TB, m, n, k,
				      alph+(ia SHIFT), lda, ldb,
				      beta+(ib SHIFT), ldc);
#endif
                        }
                        else
                        {
#ifdef TREAL
                              ipass += mmcase(TEST, CACHESIZE, TA, TB, m,
					      n, k, alph[ia], A, lda, B, ldb,
                                              beta[ib], C, ldc, D,ldc);
#else
                              ipass += mmcase(TEST, CACHESIZE, TA, TB, m,
					      n, k, alph+(ia SHIFT), A,
					      lda, B, ldb, beta+(ib SHIFT),
					      C, ldc, D,ldc);
#endif
                        }
                     }
                  }
               }
            }
         }
      }
   }
   if (TEST && !MFLOP)
      printf("\nNTEST=%d, NUMBER PASSED=%d, NUMBER FAILURES=%d\n",
             itst, ipass, itst-ipass);
   else printf("\nDone with %d timing runs\n",itst);
   free(Side);
   free(Uplo);
   free(TransA);
   free(TransB);
   free(Diag);
   free(alph);
   free(beta);
   if (!MFLOP)
   {
      free(A);
      free(B);
      free(C);
      if (D) free(D);
   }
   exit(0);
}
Ejemplo n.º 6
0
int mmcase0(int MFLOP, int CACHESIZE, char TA, char TB, int M, int N, int K,
	    SCALAR alpha, int lda, int ldb, SCALAR beta, int ldc)
{
   char *pc;
#ifdef TREAL
   char *form="%4d   %c   %c %4d %4d %4d  %5.1f  %5.1f  %6.2f %5.1f %5.2f   %3s\n";
   #define MALPH alpha
   #define MBETA beta
   TYPE betinv, bet=beta;
#else
   #define MALPH *alpha, alpha[1]
   #define MBETA *beta, beta[1]
   char *form="%4d   %c   %c %4d %4d %4d  %5.1f %5.1f  %5.1f %5.1f  %6.2f %6.1f %4.2f   %3s\n";
   TYPE betinv[2], *bet=beta;
#endif
   int nreps, incA, incB, incC, inc, nmat, k;
   TYPE *c, *C, *a, *A, *b, *B, *st;
   int ii, jj, i, j=0, PASSED, nerrs;
   double t0, t1, t2, t3, mflop, mf, mops;
   TYPE maxval, f1, ferr;
   static TYPE feps=0.0;
   static int itst=1;
   enum ATLAS_TRANS TAc, TBc;
   void *vp;

   #ifdef TCPLX
      if (*beta == 0.0 && beta[1] == 0.0) betinv[0] = betinv[1] = 0.0;
      else if (beta[1] == 0.0) { betinv[0] = 1 / *beta;  betinv[1] = 0.0; }
      else
      {
         t0 = *beta;
         t1 = beta[1];
         if (Mabs(t1) <= Mabs(t0))
         {
            t2 = t1 / t0;
            betinv[0] = t0 = 1.0 / (t0 + t1*t2);
            betinv[1] = -t0 * t2;
         }
         else
         {
            t2 = t0 / t1;
            betinv[1] = t0 = -1.0 / (t1 + t0*t2);
            betinv[0] = -t2 * t0;
         }
      }
      mops = ( ((8.0*M)*N)*K ) / 1000000.0;
   #else
      if (beta != 0.0) betinv = 1.0 / beta;
      else betinv = beta;
      mops = ( ((2.0*M)*N)*K ) / 1000000.0;
   #endif
   nreps = MFLOP / mops;
   if (nreps < 1) nreps = 1;
   if (TA == 'n' || TA == 'N')
   {
      TAc = AtlasNoTrans;
      incA = lda * K;
   }
   else
   {
      if (TA == 'c' || TA == 'C') TAc = AtlasConjTrans;
      else TAc = AtlasTrans;
      incA = lda * M;
   }
   if (TB == 'n' || TB == 'N')
   {
      incB = ldb * N;
      TBc = AtlasNoTrans;
   }
   else
   {
      incB = ldb * K;
      if (TB == 'c' || TB == 'C') TBc = AtlasConjTrans;
      else TBc = AtlasTrans;
   }
   incC = ldc*N;
   inc = incA + incB + incC;
   i = M*K + K*N + M*N;  /* amount of inc actually referenced */
   /* This is a hack; change to use of flushcache instead. */
   nmat = ((CACHESIZE/ATL_sizeof) + i)/i;
   vp = malloc(ATL_MulBySize(nmat*inc)+ATL_Cachelen);
   ATL_assert(vp);
   C = c = ATL_AlignPtr(vp);
   a = A = C + incC;
   b = B = A + incA;
   st = C + nmat*inc;
   matgen(inc, nmat, C, inc, M*N);

#ifdef DEBUG
   printmat("A0", M, K, A, lda);
   printmat("B0", K, N, B, ldb);
   printmat("C0", M, N, C, ldc);
#endif

   t0 = time00();
   for (k=nreps; k; k--)
   {
      trusted_gemm(TAc, TBc, M, N, K, alpha, a, lda, b, ldb, bet, c, ldc);
      c += inc; a += inc; b += inc;
      if (c == st)
      {
         c = C; a = A; b = B;
         if (bet == beta) bet = betinv;
         else bet = beta;
      }
   }
   t1 = time00() - t0;
   t1 /= nreps;
   if (t1 <= 0.0) mflop = t1 = 0.0;
   else   /* flop rates actually 8MNK+12MN & 2MNK + 2MN, resp */
      mflop = mops / t1;
   printf(form, itst, TA, TB, M, N, K, MALPH, MBETA, t1, mflop, 1.0, "---");

#ifdef DEBUG
   printmat("C", M, N, C, ldc);
#endif

   matgen(inc, nmat, C, inc, M*N);
   t0 = time00();
   for (k=nreps; k; k--)
   {
      test_gemm(TAc, TBc, M, N, K, alpha, a, lda, b, ldb, bet, c, ldc);
      c += inc; a += inc; b += inc;
      if (c == st)
      {
         c = C; a = A; b = B;
         if (bet == beta) bet = betinv;
         else bet = beta;
      }
   }

   t2 = time00() - t0;
   t2 /= nreps;
   if (t2 <= 0.0) t2 = mflop = 0.0;
   else mflop = mops / t2;

   pc = "---";
   if (t1 == t2) t3 = 1.0;
   else if (t2 != 0.0) t3 = t1/t2;
   else t3 = 0.0;
   printf(form, itst++, TA, TB, M, N, K, MALPH, MBETA, t2, mflop, t3, pc);
   free(vp);
   return(1);
}
Ejemplo n.º 7
0
int mmcase(int TEST, int CACHESIZE, char TA, char TB, int M, int N, int K,
	   SCALAR alpha, TYPE *A, int lda, TYPE *B, int ldb, SCALAR beta,
	   TYPE *C, int ldc, TYPE *D, int ldd)
{
   char *pc;
#ifdef TREAL
   char *form="%4d   %c   %c %4d %4d %4d  %5.1f  %5.1f  %6.2f %5.1f %5.2f   %3s\n";
   #define MALPH alpha
   #define MBETA beta
#else
   #define MALPH *alpha, alpha[1]
   #define MBETA *beta, beta[1]
   char *form="%4d   %c   %c %4d %4d %4d  %5.1f %5.1f  %5.1f %5.1f  %6.2f %6.1f %4.2f   %3s\n";
#endif
   int ii, jj, i, j=0, PASSED, nerrs;
   double t0, t1, t2, t3, mflop;
   TYPE maxval, f1, ferr;
   static TYPE feps=0.0;
   static int itst=1;
   /*int *L2, nL2=(1.3*L2SIZE)/sizeof(int);*/
   enum ATLAS_TRANS TAc, TBc;
   double l2ret;

   if (!TEST) D = C;
   /*if (nL2) L2 = malloc(nL2*sizeof(int));*/
   l2ret = ATL_flushcache( CACHESIZE );
   if (TA == 'n' || TA == 'N')
   {
      matgen(M, K, A, lda, K*1112);
      TAc = AtlasNoTrans;
   }
   else
   {
      matgen(K, M, A, lda, K*1112);
      if (TA == 'c' || TA == 'C') TAc = AtlasConjTrans;
      else TAc = AtlasTrans;
   }
   if (TB == 'n' || TB == 'N')
   {
      matgen(K, N, B, ldb, N*2238);
      TBc = AtlasNoTrans;
   }
   else
   {
      matgen(N, K, B, ldb, N*2238);
      if (TB == 'c' || TB == 'C') TBc = AtlasConjTrans;
      else TBc = AtlasTrans;
   }
   matgen(M, N, C, ldc, M*N);

#ifdef DEBUG
   printmat("A0", M, K, A, lda);
   printmat("B0", K, N, B, ldb);
   printmat("C0", M, N, C, ldc);
#endif

   /*
     if (L2)
     {
     for (i=0; i != nL2; i++) L2[i] = 0.0;
     for (i=0; i != nL2; i++) j += L2[i];
     }*/

   /* invalidate L2 cache */
   l2ret = ATL_flushcache( -1 );

   t0 = time00();
   trusted_gemm(TAc, TBc, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
   t1 = time00() - t0;
   if (t1 <= 0.0) mflop = t1 = 0.0;
   else   /* flop rates actually 8MNK+12MN & 2MNK + 2MN, resp */
      #ifdef TCPLX
         mflop = ( ((8.0*M)*N)*K ) / (t1*1000000.0);
      #else
         mflop = ( ((2.0*M)*N)*K ) / (t1*1000000.0);
      #endif
   printf(form, itst, TA, TB, M, N, K, MALPH, MBETA, t1, mflop, 1.0, "---");

#ifdef DEBUG
   printmat("C", M, N, C, ldc);
#endif

#ifndef TIMEONLY
   matgen(M, N, D, ldd, M*N);

   /* invalidate L2 cache */
   l2ret = ATL_flushcache( -1 );

   t0 = time00();
   test_gemm(TAc, TBc, M, N, K, alpha, A, lda, B, ldb, beta, D, ldd);

   t2 = time00() - t0;
   if (t2 <= 0.0) t2 = mflop = 0.0;
   else
      #ifdef TCPLX
         mflop = ( ((8.0*M)*N)*K ) / (t2*1000000.0);
      #else
         mflop = ( ((2.0*M)*N)*K ) / (t2*1000000.0);
      #endif
#ifdef DEBUG
   printmat("D", M, N, D, ldd);
#endif
   if (TEST)
   {
      if (feps == 0.0)
      {
#if 0
         f1 = feps = 0.5;
         do
         {
            feps = f1;
            f1 *= 0.5;
            maxval = 1.0 + f1;
         }
         while (maxval != 1.0);
         printf("feps=%e\n",feps);
#else
         feps = EPS;
#endif
#ifdef DEBUG
         printf("feps=%e\n",feps);
#endif
      }
#ifdef TREAL
      ferr = 2.0 * (Mabs(alpha) * 2.0*K*feps + Mabs(beta) * feps) + feps;
#else
      f1 = Mabs(*alpha) + Mabs(alpha[1]);
      maxval = Mabs(*beta) + Mabs(beta[1]);
      ferr = 2.0 * (f1*8.0*K*feps + maxval*feps) + feps;
#endif
      PASSED = 1;
      maxval = 0.0;
      pc = "YES";
      nerrs = ii = jj = 0;
      for (j=0; j != N; j++)
      {
         for (i=0; i != M SHIFT; i++)
         {
            f1 = D[i] - C[i];
            if (f1 < 0.0) f1 = -f1;
            if (f1 > ferr)
            {
               nerrs++;
               PASSED = 0;
               pc = "NO!";
               if (f1 > maxval)
               {
                  maxval=f1;
                  ii = i+1;
                  jj = j+1;
               }
            }
         }
         D += ldd SHIFT;
         C += ldc SHIFT;
      }
      if (maxval != 0.0)
         fprintf(stderr, "ERROR: nerr=%d, i=%d, j=%d, maxval=%e\n", nerrs, ii,jj, maxval);
   }
   else pc = "---";
   if (t1 == t2) t3 = 1.0;
   else if (t2 != 0.0) t3 = t1/t2;
   else t3 = 0.0;
   printf(form, itst++, TA, TB, M, N, K, MALPH, MBETA, t2, mflop, t3, pc);
#else
   itst++;
   PASSED = 1;
#endif
   /*free(L2);*/
   l2ret = ATL_flushcache( 0 );
   return(PASSED);
}