Пример #1
0
void Mjoin(PATL,CNBmm_b0)(const int M, const int N, const int K,
                          const TYPE alpha, const TYPE *A, const int lda,
                          const TYPE *B, const int ldb,
                          const TYPE beta, TYPE *C, const int ldc)
{
   NBmm_b0(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
}
Пример #2
0
static void ATL_gNBmm(const int M, const int N, const int K, SCALAR alpha,
                      const TYPE *A, const int lda, const TYPE *B,
                      const int ldb, const SCALAR beta, TYPE *C,
                      const int ldc)
/*
 * BETA is known to be 0 or 1
 */
{
   if (M == MB && N == NB && K == KB)
   {
      if (beta == ATL_rone)
         NBmm_b1(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
      else
         NBmm_b0(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
   }
   else if (M != MB)
   {
      if (N == NB && K == KB)
      {
         if (beta == ATL_rone)
            Mjoin(PATL,pMBmm_b1)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
         else
            Mjoin(PATL,pMBmm_b0)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
      }
      else Mjoin(PATL,pKBmm)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
   }
   else if (N != NB)  /* ib is full */
   {
      if (K == KB)
      {
         if (beta == ATL_rone)
            Mjoin(PATL,pNBmm_b1)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
         else
            Mjoin(PATL,pNBmm_b0)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
      }
      else Mjoin(PATL,pKBmm)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
   }
   else  /* ib and jb are full */
   {
      if (beta == ATL_rone)
         Mjoin(PATL,pKBmm_b1)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
      else
         Mjoin(PATL,pKBmm_b0)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
   }
}
Пример #3
0
static void ATL_gNBmm_b0
   (const int M, const int N, const int K, const TYPE alpha,
    const TYPE *A, const int lda, const TYPE *B, const int ldb,
    const TYPE beta, TYPE *C, const int ldc)
/*
 * ALPHA is known to be 1 (handled by copy)
 * BETA is known to be 0; we handle actual BETA in putblk phase
 */
{
   if (M == MB && N == NB && K == KB)
   {
      NBmm_b0(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rzero, C+NBNB, ldc);
      NBmm_b0(M, N, K, ATL_rone, A, lda, B+NBNB, ldb, ATL_rzero, C, ldc);
      NBmm_bX(M, N, K, ATL_rone, A+NBNB, lda, B+NBNB, ldb, ATL_rnone,
              C+NBNB, ldc);
      NBmm_b1(M, N, K, ATL_rone, A+NBNB, lda, B, ldb, ATL_rone, C, ldc);
   }
   else if (M != MB)
   {
      if (N == NB && K == KB)
      {
         Mjoin(PATLU,pMBmm_b0)(M, N, K, ATL_rone, A, lda, B, ldb,
                               ATL_rzero, C+M*N, ldc);
         Mjoin(PATLU,pMBmm_b0)(M, N, K, ATL_rone, A, lda, B+NBNB, ldb,
                               ATL_rzero, C, ldc);
         Mjoin(PATLU,pMBmm_bX)(M, N, K, ATL_rone, A+M*K, lda, B+NBNB, ldb,
                               ATL_rnone, C+M*N, ldc);
         Mjoin(PATLU,pMBmm_b1)(M, N, K, ATL_rone, A+M*K, lda, B, ldb,
                               ATL_rone, C, ldc);
      }
      else
      {
         Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B, ldb,
                            ATL_rzero, C+M*N, ldc);
         Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B+N*K, ldb,
                            ATL_rzero, C, ldc);
         Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb,
                            ATL_rnone, C+M*N, ldc);
         Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B, ldb,
                            ATL_rone, C, ldc);
      }
   }
   else if (N != NB)  /* ib is full */
   {
      if (K == KB)
      {
         Mjoin(PATLU,pNBmm_b0)(M, N, K, ATL_rone, A, lda, B, ldb,
                               ATL_rzero, C+M*N, ldc);
         Mjoin(PATLU,pNBmm_b0)(M, N, K, ATL_rone, A, lda, B+N*K, ldb,
                               ATL_rzero, C, ldc);
         Mjoin(PATLU,pNBmm_bX)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb,
                               ATL_rnone, C+M*N, ldc);
         Mjoin(PATLU,pNBmm_b1)(M, N, K, ATL_rone, A+M*K, lda, B, ldb,
                               ATL_rone, C, ldc);
      }
      else
      {
         Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B, ldb,
                            ATL_rzero, C+M*N, ldc);
         Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B+N*K, ldb,
                            ATL_rzero, C, ldc);
         Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb,
                            ATL_rnone, C+M*N, ldc);
         Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B, ldb,
                            ATL_rone, C, ldc);
      }
   }
   else  /* ib and jb are full */
   {
         Mjoin(PATLU,pKBmm_b0)(M, N, K, ATL_rone, A, lda, B, ldb,
                               ATL_rzero, C+M*N, ldc);
         Mjoin(PATLU,pKBmm_b0)(M, N, K, ATL_rone, A, lda, B+N*K, ldb,
                               ATL_rzero, C, ldc);
         Mjoin(PATLU,pKBmm_bX)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb,
                               ATL_rnone, C+M*N, ldc);
         Mjoin(PATLU,pKBmm_b1)(M, N, K, ATL_rone, A+M*K, lda, B, ldb,
                               ATL_rone, C, ldc);
   }
}