Beispiel #1
0
void Mjoin(PATL,mmJIK2)
(int K, int nMb, int nNb, int nKb, int ib, int jb, int kb,
 const SCALAR alpha, const TYPE *pA0, const TYPE *B, int ldb,
 TYPE *pB0, int incB, MAT2BLK B2blk, const SCALAR beta,
 TYPE *C, int ldc, MATSCAL gescal, NBMM0 NBmm0)
{
    const int incK = ATL_MulByNB(K)SHIFT, incC = ATL_MulByNB(ldc-nMb) SHIFT;
    const int ZEROC = ((gescal == NULL) && SCALAR_IS_ZERO(beta));
    int i, j = nNb;
    const TYPE *pA=pA0;
    const TYPE rbeta = ( (gescal) ? ATL_rone : *beta );
    TYPE *pB=pB0, *stB=pB0+(ATL_MulByNBNB(nKb)SHIFT);

    if (nNb)
    {
        do  /* Loop over full column panels of B */
        {
            if (B)
            {
                B2blk(K, NB, B, ldb, pB, alpha);
                B += incB;
            }
            if (nMb)
            {
                i = nMb;
                do /* loop over full row panels of A */
                {
                    if (gescal) gescal(NB, NB, beta, C, ldc);
                    if (nKb) /* loop over full blocks in panels */
                    {
                        NBmm0(MB, NB, KB, ATL_rone, pA, KB, pB, KB, rbeta, C, ldc);
                        pA += NBNB2;
                        pB += NBNB2;
                        if (nKb != 1)
                        {
                            do
                            {
                                NBmm_b1(MB, NB, KB, ATL_rone, pA, KB, pB, KB, ATL_rone,
                                        C, ldc);
                                pA += NBNB2;
                                pB += NBNB2;
                            }
                            while (pB != stB);
                        }
                        if (kb)
                        {
                            KBmm(MB, NB, kb, ATL_rone, pA, kb, pB, kb, ATL_rone,
                                 C, ldc);
                            pA += ATL_MulByNB(kb)<<1;
                        }
                    }
                    else if (kb)
                    {
                        if (ZEROC) Mjoin(PATL,gezero)(MB, NB, C, ldc);
                        KBmm(MB, NB, kb, ATL_rone, pA, kb, pB, kb, rbeta, C, ldc);
                        pA += ATL_MulByNB(kb)<<1;
                    }
                    pB = pB0;
                    C += NB2;
                }
                while (--i);
            }
            if (ib)
            {
                if (gescal) gescal(ib, NB, beta, C, ldc);
                IBNBmm(ib, K, pA, pB, rbeta, C, ldc);
            }
            if (!B)
            {
                pB0 += incK;
                pB = pB0;
                stB += incK;
            }
            C += incC;
            pA = pA0;
        }
        while (--j);
    }
    if (jb)
    {
        if (B) B2blk(K, jb, B, ldb, pB, alpha);
        for (i=nMb; i; i--)
        {
            if (gescal) gescal(NB, jb, beta, C, ldc);
            NBJBmm(jb, K, pA, pB, rbeta, C, ldc);
            pA += incK;
            C += NB2;
        }
        if (ib)
        {
            if (gescal) gescal(ib, jb, beta, C, ldc);
            IBJBmm(ib, jb, K, pA, pB, rbeta, C, ldc);
        }
    }
}
Beispiel #2
0
void Mjoin(PATL,mmIJK2)(int K, int nMb, int nNb, int nKb, int ib, int jb,
                        int kb, const SCALAR alpha, const TYPE *A, int lda,
                        TYPE *pA0, int incA, MAT2BLK A2blk, const TYPE *pB0,
                        const SCALAR beta, TYPE *C, int ldc, TYPE *pC,
                        PUTBLK putblk, NBMM0 NBmm0)
/*
 * Outer three loops for matmul with outer loop over rows of A
 */
{
    int i, j, ldpc;
    const int ZEROC = ((putblk == NULL) && SCALAR_IS_ZERO(beta));
    const int incK = ATL_MulByNB(K), incC = ATL_MulByNB(ldc);
    TYPE *pA=pA0, *stA=pA0+ATL_MulByNBNB(nKb);
    const TYPE *pB=pB0;
    const TYPE cubeta = ( (putblk) ? ATL_rzero : beta );
    TYPE *c;

    if (putblk)
    {
        ldpc = NB;
        if (!nKb && kb) Mjoin(PATL,gezero)(MB, NB, pC, MB);
    }
    else ldpc = ldc;
    for (i=nMb; i; i--)    /* loop over full row panels of A */
    {
        if (A)
        {
            A2blk(K, NB, A, lda, pA, alpha);  /* get 1 row panel of A */
            A += incA;
        }
        if (!putblk) pC = C;
        c = C;
        C += NB;
        for (j=nNb; j; j--)  /* full column panels of B */
        {
            if (nKb)
            {
                NBmm0(MB, NB, KB, ATL_rone, pA, KB, pB, KB, beta, pC, ldpc);
                pA += NBNB;
                pB += NBNB;
                if (nKb != 1)
                {
                    do
                    {
                        NBmm(MB, NB, KB, ATL_rone, pA, KB, pB, KB, ATL_rone,
                             pC, ldpc);
                        pA += NBNB;
                        pB += NBNB;
                    }
                    while (pA != stA);
                }
                if (kb)
                {
                    KBmm(MB, NB, kb, ATL_rone, pA, kb, pB, kb, ATL_rone, pC, ldpc);
                    pB += kb*NB;
                }
            }
            else
            {
                if (ZEROC) Mjoin(PATL,gezero)(MB, NB, pC, ldpc);
                if (kb)
                {
                    KBmm(MB, NB, kb, ATL_rone, pA, kb, pB, kb, cubeta, pC, ldpc);
                    pB += kb*NB;
                }
            }
            pA = pA0;
            if (putblk) putblk(NB, NB, pC, c, ldc, beta);
            else pC += incC;
            c += incC;
        }
        if (jb)
        {
            NBJBmm(jb, K, pA, pB, cubeta, pC, ldpc);
            if (putblk) putblk(NB, jb, pC, c, ldc, beta);
        }
        pB = pB0;
        if (!A)
        {
            pA0 += incK;
            pA = pA0;
            stA += incK;
        }
    }
    if (ib)
    {
        c = C;
        if (A) A2blk(K, ib, A, lda, pA, alpha);  /* get last row panel of A */
        for (j=nNb; j; j--)  /* full column panels of B */
        {
            if (putblk)
            {
                IBNBmm(ib, K, pA, pB, ATL_rzero, pC, ib);
                putblk(ib, NB, pC, c, ldc, beta);
            }
            else IBNBmm(ib, K, pA, pB, beta, c, ldc);
            pB += incK;
            c += incC;
        }
        if (jb)
        {
            if (putblk)
            {
                IBJBmm(ib, jb, K, pA, pB, ATL_rzero, pC, ib);
                putblk(ib, jb, pC, c, ldc, beta);
            }
            else IBJBmm(ib, jb, K, pA, pB, beta, c, ldc);
        }
    }
}
Beispiel #3
0
void Mjoin(PATL,mmIJK2)
   (int K, int nMb, int nNb, int nKb, int ib, int jb, int kb,
    const SCALAR alpha, const TYPE *A, const int lda, TYPE *pA0, const int incA,
    MAT2BLK A2blk, TYPE *pB0, const SCALAR beta, TYPE *C, int ldc,
    MATSCAL gescal, NBMM0 NBmm0)
{
   const int incK = ATL_MulByNB(K)<<1;
   const int incCn = ATL_MulByNB(ldc)<<1, incCm = (MB<<1) - nNb*incCn;
   const int ZEROC = ((gescal == NULL) && SCALAR_IS_ZERO(beta));
   int i, j, k;
   const TYPE *pB=pB0;
   const TYPE rbeta = ( (gescal) ? ATL_rone : *beta );
   TYPE *pA=pA0;

   for (i=nMb; i; i--)
   {
      if (A)
      {
         A2blk(K, NB, A, lda, pA, alpha);  /* get 1 row panel of A */
         A += incA;
      }
      for (j=nNb; j; j--)
      {
         if (gescal) gescal(MB, NB, beta, C, ldc);
         if (nKb)
         {
            NBmm0(MB, NB, KB, ATL_rone, pA, KB, pB, KB, rbeta, C, ldc);
            pA += NBNB2;
            pB += NBNB2;
            if (nKb != 1)
            {
               for (k=nKb-1; k; k--, pA += NBNB2, pB += NBNB2)
                  NBmm_b1(MB, NB, KB, ATL_rone, pA, KB, pB, KB,
                          ATL_rone, C, ldc);
            }
            if (kb)
            {
               KBmm(MB, NB, kb, ATL_rone, pA, kb, pB, kb, ATL_rone, C, ldc);
               pB += ATL_MulByNB(kb)<<1;
            }
         }
         else
         {
            if (ZEROC) Mjoin(PATL,gezero)(MB, NB, C, ldc);
            if (kb)
            {
               KBmm(MB, NB, kb, ATL_rone, pA, kb, pB, kb, rbeta, C, ldc);
               pB += ATL_MulByNB(kb)<<1;
            }
         }
         pA = pA0;
         C += incCn;
      }
      if (jb)
      {
         if (gescal) gescal(MB, jb, beta, C, ldc);
         MBJBmm(jb, K, pA, pB, rbeta, C, ldc);
      }
      pB = pB0;
      if (!A)
      {
         pA0 += incK;
         pA = pA0;
      }
      C += incCm;
   }
   if (ib)
   {
      if (A) A2blk(K, ib, A, lda, pA, alpha);   /* get last row panel of A */
      for(j=nNb; j; j--) /* full column panels of B */
      {
         if (gescal) gescal(ib, NB, beta, C, ldc);
         IBNBmm(ib, K, pA, pB, rbeta, C, ldc);
         pB += incK;
         C += incCn;
      }
      if (jb)
      {
         if (gescal) gescal(ib, jb, beta, C, ldc);
         IBJBmm(ib, jb, K, pA, pB, rbeta, C, ldc);
      }
   }
}