示例#1
0
static TYPE *getvec(int npad, TYPE padval, int N, int incX)
{
   TYPE *X, *x;
   int i, n;

   if (N <= 0) return(NULL);
   incX = Mabs(incX);
   n = 2*npad + 1+(N-1)*incX;
   X = malloc( ATL_sizeof*n );
   assert(X);
   vecset(n, padval, X);
   #ifdef TCPLX
      npad *= 2;
      incX *= 2;
   #endif
   x = X + npad;
   for (i=0; i < N; i++, x += incX)
   {
      #ifdef TREAL
         *x = dumb_rand();
      #else
         *x   = dumb_rand();
         x[1] = dumb_rand();
      #endif
   }
   return(X);
}
示例#2
0
文件: ATL_gegen.c 项目: AIDman/Kaldi
void Mjoin(PATL,gegen)
   (const int M0, const int N, TYPE *A, const int lda0, const int seed)
{
   const int M = M0 SHIFT, lda = lda0 SHIFT;
   int i, j;

   dumb_seed(seed);
   Mjoin(PATL,gefillgap)(M0, N, A, lda0);
   for (j=N; j; j--)
   {
      for (i=0; i != M; i++) A[i] = dumb_rand();
      A += lda;
   }
}
示例#3
0
文件: gemmtst.c 项目: certik/vendor
void matgen(int M, int N, TYPE *A, int lda, int seed)
{
   int i, j;

#ifdef TCPLX
   M *= 2;
   lda *= 2;
#endif
   dumb_seed(seed);
   for (j=N; j; j--)
   {
      for (i=0; i != M; i++) A[i] = dumb_rand();
      A += lda;
   }
}
示例#4
0
文件: ATL_trgen.c 项目: certik/vendor
void Mjoin(PATL,trgen)(const enum ATLAS_UPLO Uplo, const enum ATLAS_DIAG Diag,
                       const int N, TYPE *A, const int lda0, const int seed)
{
   const int M = N SHIFT, lda = lda0 SHIFT;
   int i, j;

   dumb_seed(seed);
   Mjoin(PATL,gefillgap)(N, N, A, lda0);
   if (Uplo == AtlasUpper)
   {
      for (j=0; j != N; j++)
      {
         for (i=0; i != (j SHIFT); i++) A[i] = dumb_rand();
         if (Diag == AtlasNonUnit)
         {
            A[i++] = dumb_rand();
            #ifdef TCPLX
               A[i++] = dumb_rand();
            #endif
         }
         for (; i < M; i++) A[i] = FILLCONST;
         A += lda;
      }
   }
   else
   {
      for (j=0; j != N; j++)
      {
         for (i=0; i != (j SHIFT); i++) A[i] = FILLCONST;
         if (Diag == AtlasNonUnit)
         {
            A[i++] = dumb_rand();
            #ifdef TCPLX
               A[i++] = dumb_rand();
            #endif
         }
         for (; i != M; i++) A[i] = dumb_rand();
         A += lda;
      }
   }
}
示例#5
0
int mmtst(void)
{
   char fnam[80];
#if defined(LDA) && LDA != 0
      const int lda=LDA;
#else
      const int lda=2*LDA2;
#endif
#if defined(LDB) && LDB != 0
   const int ldb=LDB;
#else
   const int ldb=2*LDB2;
#endif
#if defined(LDC) && LDC != 0
   const int ldc=LDC;
#else
   const int ldc=2*LDC2;
#endif
   int nA, nB;
   #ifdef TCPLX
      int inca, incb, incc;
      const TYPE one=1.0, none=(-1.0);
      #if (ALPHA == 1)
         TYPE alpha[2] = {1.0, 0.0};
      #elif (ALPHA == -1)
         TYPE alpha[2] = {-1.0, 0.0};
      #else
         TYPE alpha[2] = {2.3, 0.0};
      #endif
      #if (BETA == 1)
         TYPE beta[2] = {1.0, 0.0};
      #elif (BETA == -1)
         TYPE beta[2] = {-1.0, 0.0};
      #elif (BETA == 0)
         TYPE beta[2] = {0.0, 0.0};
      #else
         TYPE beta[2] = {1.3, 0.0};
      #endif
   #else
      #ifdef ALPHA
         TYPE alpha=ALPHA;
      #else
         TYPE alpha=1.0;
      #endif
      #ifdef BETA
         TYPE beta=BETA;
      #else
         TYPE beta=1.0;
      #endif
   #endif
   const TYPE rone=1.0, rnone=(-1.0);
   void *va=NULL, *vb=NULL, *vc=NULL;
   TYPE *C0, *C1, *A, *B;
   TYPE diff, tmp;
   int i, j, k, n, nerr;
   int M=MB, N=NB, K=KB;
   TYPE ErrBound;

   if (!M) M = MB0;
   if (!N) N = NB0;
   if (!K) K = KB0;
   #ifdef TREAL
      ErrBound = 2.0 * (Mabs(alpha) * 2.0*K*EPS + Mabs(beta) * EPS) + EPS;
   #else
      diff = Mabs(*alpha) + Mabs(alpha[1]);
      tmp = Mabs(*beta) + Mabs(beta[1]);
      ErrBound =  2.0 * (diff*8.0*K*EPS + tmp*EPS) + EPS;
   #endif
   #ifdef NoTransA
      nA = K;
   #else
      nA = M;
   #endif
   #ifdef NoTransB
      nB = N;
   #else
      nB = K;
   #endif
   #ifdef TCPLX
      inca = lda*nA;
      incb = ldb*nB;
   #endif
   #ifdef ATL_MinMMAlign
      va = malloc(ATL_MinMMAlign + lda*nA*ATL_sizeof);
      vb = malloc(ATL_MinMMAlign + ldb*nB*ATL_sizeof);
      vc = C0 = malloc(2*ldc*N*ATL_sizeof);
      assert(va && vb && C0);
      A = (TYPE *) ( ( ((size_t) va)/ATL_MinMMAlign ) * ATL_MinMMAlign
                     + ATL_MinMMAlign );
      B = (TYPE *) ( ( ((size_t) vb)/ATL_MinMMAlign ) * ATL_MinMMAlign
                     + ATL_MinMMAlign );
   #else
      C0 = vc = malloc( (2*ldc*N + lda*nA + ldb*nB) * ATL_sizeof);
      assert(vc);
      A = C1 + (ldc * N SHIFT);
      B = A + (lda * nA SHIFT);
   #endif
   C1 = C0 + (ldc * N SHIFT);
   for (n=lda*nA SHIFT, i=0; i < n; i++) A[i] = dumb_rand();
   for (n=ldb*nB SHIFT, i=0; i < n; i++) B[i] = dumb_rand();
   for (n=ldc*N SHIFT, i=0; i < n; i++) C0[i] = C1[i] = dumb_rand();
   tst_mm(M, N, K, alpha, A, lda, B, ldb, beta, C0, ldc);
   NBmm(M, N, K, alpha, A, lda, B, ldb, beta, C1, ldc);
   nerr = 0;
   for (j=0; j < N; j++)
   {
      for (i=0; i < M SHIFT; i++)
      {
         k = i + j*(ldc SHIFT);
         diff = C0[k] - C1[k];
         if (diff < 0.0) diff = -diff;
         if (diff > ErrBound)
         {
            fprintf(stderr, "C(%d,%d) : expected=%f, got=%f\n",
                    i, j, C0[k], C1[k]);
            nerr++;
         }
      }
   }
   free(vc);
   if (va) free(va);
   if (vb) free(vb);
   return(nerr);
}
示例#6
0
double GetKmmMflop
(
    CINT mb, CINT nb, CINT kb,           /* C: mbxnb, At: kbxmb, B: kbXnb */
#ifdef ATL_NEWTIME
    CINT mu, CINT nu, CINT ku,
#endif
    CINT movA, CINT movB, CINT movC,     /* which mat move in flush array? */
    int FLSIZE,                          /* min area to move in in bytes */
    CINT reps,                           /* # calls to kmm in one timing */
    CINT LDC                             /* what should ldc be set to? */
)
/*
 * Returns MFLOP rate of matmul kernel KMM
 * LDC: if (LDC == 0), then set ldc=MB for timings.
 *      if (LDC != 0 && movC != 0), then ldc= col length in move space
 *      else ldc = LDC;
 *
 */
{
#ifdef ATL_NEWTIME
    CINT mblks = mb/mu, nblks = nb/nu;
#endif
    const int NOMOVE = !(movA|movB|movC);
    int ldc, setsz, nset, i, j, incA, incB, incC, n, extra;
    TYPE *C, *A, *B, *a, *b, *c;
    double t0, t1, mf;
    const TYPE alpha=1.0;
    TYPE beta=1.0;
    void *vp=NULL;

    if (NOMOVE)
    {
        ldc = (LDC) ? LDC : mb;
        setsz = (ldc * nb + kb*(mb+nb));
        vp = malloc(ATL_Cachelen + ATL_MulBySize(setsz));
        ATL_assert(vp);
        A =  ATL_AlignPtr(vp);
        B = A + mb*kb;
        C = B + kb*nb;
        for (i=0; i < setsz; i++) A[i] = dumb_rand();
        incA = incB = incC = 0;
    }
    else
    {
        if (movA && movB && movC)         /* no reuse at all */
        {
            setsz = ATL_MulBySize(mb*nb+kb*(mb+nb));
            nset = (FLSIZE+setsz-1)/setsz;
            FLSIZE = nset*setsz;
            setsz = mb*nb+kb*(mb+nb);
            vp = malloc(ATL_Cachelen + ATL_MulBySize(setsz));
            ATL_assert(vp);
            A = ATL_AlignPtr(vp);
            B = A + kb*mb*nset;
            C = B + kb*nb*nset;
            ldc = (LDC) ? mb*nset : mb;
            for (n=setsz*nset,i=0; i < n; i++) A[i] = dumb_rand();
            incA = mb*kb;
            incB = kb*nb;
            incC = mb*nb;
        }
        else if (movA && movB && !movC)   /* square-case ATLAS behavior */
        {
            setsz = kb*(mb+nb);
            ldc = (LDC) ? LDC : mb;
            ATL_assert(ldc >= mb);
            extra = ldc*nb;
            incA = mb*kb;
            incB = kb*nb;
            incC = 0;
        }
        else if (!movB && movA && movC)   /* rank-K behavior */
        {
            setsz = mb*(kb+nb);
            extra = kb*nb;
            incA = mb*kb;
            incB = 0;
            incC = mb*nb;
        }
        else
        {
            fprintf(stderr, "%s,%d: What case are you wanting?\n",
                    __FILE__, __LINE__);
            exit(-1);
        }
        if (!vp)
        {
            i = ATL_MulBySize(setsz);
            nset = (FLSIZE+i-1)/i;
            FLSIZE = nset * i;
            vp = malloc(ATL_Cachelen + ATL_MulBySize(FLSIZE+extra));
            ATL_assert(vp);
            A = ATL_AlignPtr(vp);
            if (movC)
            {
                C = A + mb*kb*nset;
                ldc = (LDC) ? mb*nset : mb;
                B = C + mb*nb*nset;
            }
            else
            {
                B = A + mb*kb*nset;
                C = B + kb*nb*nset;
            }
            for (n=setsz*nset+extra,i=0; i < n; i++) A[i] = dumb_rand();
        }
    }
    a = A;
    b = B;
    c = C;
    t0 = ATL_walltime();
    for (j=0,i=reps; i; i--)
    {
#ifdef ATL_NEWTIME
        KMM(mblks, nblks, kb, a, b, c, movA ? a+incA : a,
            movB ? b+incB : b, movC ? c+incC : c);
#else
        KMM(mb, nb, kb, alpha, a, kb, b, kb, beta, c, ldc);
#endif
        if (++j != nset)
        {
            a += incA;
            b += incB;
            c += incC;
        }
        else
        {
#ifndef ATL_NEWTIME
            beta = (beta != 0.0) ? -beta : 0.0;
#endif
            j = 0;
            a = A;
            b = B;
            c = C;
        }
    }
    t1 = ATL_walltime() - t0;
    mf = (2.0*reps*mb*nb*kb) / (t1*1000000.0);
    free(vp);
    return(mf);
}