void Mjoin(PATL,CNBmm_bX)(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc) { NBmm_bX(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); }
static void ATL_gNBmm_b1 (const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc) /* * ALPHA is known to be 1 (handled by copy) * BETA is known to be 1; we handle actual BETA in putblk phase */ { if (M == MB && N == NB && K == KB) { NBmm_bX(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rnone, C+NBNB, ldc); NBmm_b1(M, N, K, ATL_rone, A, lda, B+NBNB, ldb, ATL_rone, C, ldc); NBmm_bX(M, N, K, ATL_rone, A+NBNB, lda, B+NBNB, ldb, ATL_rnone, C+NBNB, ldc); NBmm_b1(M, N, K, ATL_rone, A+NBNB, lda, B, ldb, ATL_rone, C, ldc); } else if (M != MB) { if (N == NB && K == KB) { Mjoin(PATLU,pMBmm_bX)(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pMBmm_b1)(M, N, K, ATL_rone, A, lda, B+NBNB, ldb, ATL_rone, C, ldc); Mjoin(PATLU,pMBmm_bX)(M, N, K, ATL_rone, A+M*K, lda, B+NBNB, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pMBmm_b1)(M, N, K, ATL_rone, A+M*K, lda, B, ldb, ATL_rone, C, ldc); } else { Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B+N*K, ldb, ATL_rone, C, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B, ldb, ATL_rone, C, ldc); } } else if (N != NB) /* ib is full */ { if (K == KB) { Mjoin(PATLU,pNBmm_bX)(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pNBmm_b1)(M, N, K, ATL_rone, A, lda, B+N*K, ldb, ATL_rone, C, ldc); Mjoin(PATLU,pNBmm_bX)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pNBmm_b1)(M, N, K, ATL_rone, A+M*K, lda, B, ldb, ATL_rone, C, ldc); } else { Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B+N*K, ldb, ATL_rone, C, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B, ldb, ATL_rone, C, ldc); } } else /* ib and jb are full */ { Mjoin(PATLU,pKBmm_bX)(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pKBmm_b1)(M, N, K, ATL_rone, A, lda, B+N*K, ldb, ATL_rone, C, ldc); Mjoin(PATLU,pKBmm_bX)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pKBmm_b1)(M, N, K, ATL_rone, A+M*K, lda, B, ldb, ATL_rone, C, ldc); } }