示例#1
0
int mmtst(void)
{
   char fnam[80];
#if defined(LDA) && LDA != 0
      const int lda=LDA;
#else
      const int lda=2*LDA2;
#endif
#if defined(LDB) && LDB != 0
   const int ldb=LDB;
#else
   const int ldb=2*LDB2;
#endif
#if defined(LDC) && LDC != 0
   const int ldc=LDC;
#else
   const int ldc=2*LDC2;
#endif
   int nA, nB;
   #ifdef TCPLX
      int inca, incb, incc;
      const TYPE one=1.0, none=(-1.0);
      #if (ALPHA == 1)
         TYPE alpha[2] = {1.0, 0.0};
      #elif (ALPHA == -1)
         TYPE alpha[2] = {-1.0, 0.0};
      #else
         TYPE alpha[2] = {2.3, 0.0};
      #endif
      #if (BETA == 1)
         TYPE beta[2] = {1.0, 0.0};
      #elif (BETA == -1)
         TYPE beta[2] = {-1.0, 0.0};
      #elif (BETA == 0)
         TYPE beta[2] = {0.0, 0.0};
      #else
         TYPE beta[2] = {1.3, 0.0};
      #endif
   #else
      #ifdef ALPHA
         TYPE alpha=ALPHA;
      #else
         TYPE alpha=1.0;
      #endif
      #ifdef BETA
         TYPE beta=BETA;
      #else
         TYPE beta=1.0;
      #endif
   #endif
   const TYPE rone=1.0, rnone=(-1.0);
   void *va=NULL, *vb=NULL, *vc=NULL;
   TYPE *C0, *C1, *A, *B;
   TYPE diff, tmp;
   int i, j, k, n, nerr;
   int M=MB, N=NB, K=KB;
   TYPE ErrBound;

   if (!M) M = MB0;
   if (!N) N = NB0;
   if (!K) K = KB0;
   #ifdef TREAL
      ErrBound = 2.0 * (Mabs(alpha) * 2.0*K*EPS + Mabs(beta) * EPS) + EPS;
   #else
      diff = Mabs(*alpha) + Mabs(alpha[1]);
      tmp = Mabs(*beta) + Mabs(beta[1]);
      ErrBound =  2.0 * (diff*8.0*K*EPS + tmp*EPS) + EPS;
   #endif
   #ifdef NoTransA
      nA = K;
   #else
      nA = M;
   #endif
   #ifdef NoTransB
      nB = N;
   #else
      nB = K;
   #endif
   #ifdef TCPLX
      inca = lda*nA;
      incb = ldb*nB;
   #endif
   #ifdef ATL_MinMMAlign
      va = malloc(ATL_MinMMAlign + lda*nA*ATL_sizeof);
      vb = malloc(ATL_MinMMAlign + ldb*nB*ATL_sizeof);
      vc = C0 = malloc(2*ldc*N*ATL_sizeof);
      assert(va && vb && C0);
      A = (TYPE *) ( ( ((size_t) va)/ATL_MinMMAlign ) * ATL_MinMMAlign
                     + ATL_MinMMAlign );
      B = (TYPE *) ( ( ((size_t) vb)/ATL_MinMMAlign ) * ATL_MinMMAlign
                     + ATL_MinMMAlign );
   #else
      C0 = vc = malloc( (2*ldc*N + lda*nA + ldb*nB) * ATL_sizeof);
      assert(vc);
      A = C1 + (ldc * N SHIFT);
      B = A + (lda * nA SHIFT);
   #endif
   C1 = C0 + (ldc * N SHIFT);
   for (n=lda*nA SHIFT, i=0; i < n; i++) A[i] = dumb_rand();
   for (n=ldb*nB SHIFT, i=0; i < n; i++) B[i] = dumb_rand();
   for (n=ldc*N SHIFT, i=0; i < n; i++) C0[i] = C1[i] = dumb_rand();
   tst_mm(M, N, K, alpha, A, lda, B, ldb, beta, C0, ldc);
   NBmm(M, N, K, alpha, A, lda, B, ldb, beta, C1, ldc);
   nerr = 0;
   for (j=0; j < N; j++)
   {
      for (i=0; i < M SHIFT; i++)
      {
         k = i + j*(ldc SHIFT);
         diff = C0[k] - C1[k];
         if (diff < 0.0) diff = -diff;
         if (diff > ErrBound)
         {
            fprintf(stderr, "C(%d,%d) : expected=%f, got=%f\n",
                    i, j, C0[k], C1[k]);
            nerr++;
         }
      }
   }
   free(vc);
   if (va) free(va);
   if (vb) free(vb);
   return(nerr);
}
示例#2
0
文件: ATL_mmIJK.c 项目: apollos/atlas
void Mjoin(PATL,mmIJK2)(int K, int nMb, int nNb, int nKb, int ib, int jb,
                        int kb, const SCALAR alpha, const TYPE *A, int lda,
                        TYPE *pA0, int incA, MAT2BLK A2blk, const TYPE *pB0,
                        const SCALAR beta, TYPE *C, int ldc, TYPE *pC,
                        PUTBLK putblk, NBMM0 NBmm0)
/*
 * Outer three loops for matmul with outer loop over rows of A
 */
{
    int i, j, ldpc;
    const int ZEROC = ((putblk == NULL) && SCALAR_IS_ZERO(beta));
    const int incK = ATL_MulByNB(K), incC = ATL_MulByNB(ldc);
    TYPE *pA=pA0, *stA=pA0+ATL_MulByNBNB(nKb);
    const TYPE *pB=pB0;
    const TYPE cubeta = ( (putblk) ? ATL_rzero : beta );
    TYPE *c;

    if (putblk)
    {
        ldpc = NB;
        if (!nKb && kb) Mjoin(PATL,gezero)(MB, NB, pC, MB);
    }
    else ldpc = ldc;
    for (i=nMb; i; i--)    /* loop over full row panels of A */
    {
        if (A)
        {
            A2blk(K, NB, A, lda, pA, alpha);  /* get 1 row panel of A */
            A += incA;
        }
        if (!putblk) pC = C;
        c = C;
        C += NB;
        for (j=nNb; j; j--)  /* full column panels of B */
        {
            if (nKb)
            {
                NBmm0(MB, NB, KB, ATL_rone, pA, KB, pB, KB, beta, pC, ldpc);
                pA += NBNB;
                pB += NBNB;
                if (nKb != 1)
                {
                    do
                    {
                        NBmm(MB, NB, KB, ATL_rone, pA, KB, pB, KB, ATL_rone,
                             pC, ldpc);
                        pA += NBNB;
                        pB += NBNB;
                    }
                    while (pA != stA);
                }
                if (kb)
                {
                    KBmm(MB, NB, kb, ATL_rone, pA, kb, pB, kb, ATL_rone, pC, ldpc);
                    pB += kb*NB;
                }
            }
            else
            {
                if (ZEROC) Mjoin(PATL,gezero)(MB, NB, pC, ldpc);
                if (kb)
                {
                    KBmm(MB, NB, kb, ATL_rone, pA, kb, pB, kb, cubeta, pC, ldpc);
                    pB += kb*NB;
                }
            }
            pA = pA0;
            if (putblk) putblk(NB, NB, pC, c, ldc, beta);
            else pC += incC;
            c += incC;
        }
        if (jb)
        {
            NBJBmm(jb, K, pA, pB, cubeta, pC, ldpc);
            if (putblk) putblk(NB, jb, pC, c, ldc, beta);
        }
        pB = pB0;
        if (!A)
        {
            pA0 += incK;
            pA = pA0;
            stA += incK;
        }
    }
    if (ib)
    {
        c = C;
        if (A) A2blk(K, ib, A, lda, pA, alpha);  /* get last row panel of A */
        for (j=nNb; j; j--)  /* full column panels of B */
        {
            if (putblk)
            {
                IBNBmm(ib, K, pA, pB, ATL_rzero, pC, ib);
                putblk(ib, NB, pC, c, ldc, beta);
            }
            else IBNBmm(ib, K, pA, pB, beta, c, ldc);
            pB += incK;
            c += incC;
        }
        if (jb)
        {
            if (putblk)
            {
                IBJBmm(ib, jb, K, pA, pB, ATL_rzero, pC, ib);
                putblk(ib, jb, pC, c, ldc, beta);
            }
            else IBJBmm(ib, jb, K, pA, pB, beta, c, ldc);
        }
    }
}