void Mjoin(PATL,CNBmm_b0)(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc) { NBmm_b0(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); }
static void ATL_gNBmm(const int M, const int N, const int K, SCALAR alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const SCALAR beta, TYPE *C, const int ldc) /* * BETA is known to be 0 or 1 */ { if (M == MB && N == NB && K == KB) { if (beta == ATL_rone) NBmm_b1(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); else NBmm_b0(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); } else if (M != MB) { if (N == NB && K == KB) { if (beta == ATL_rone) Mjoin(PATL,pMBmm_b1)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); else Mjoin(PATL,pMBmm_b0)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); } else Mjoin(PATL,pKBmm)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); } else if (N != NB) /* ib is full */ { if (K == KB) { if (beta == ATL_rone) Mjoin(PATL,pNBmm_b1)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); else Mjoin(PATL,pNBmm_b0)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); } else Mjoin(PATL,pKBmm)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); } else /* ib and jb are full */ { if (beta == ATL_rone) Mjoin(PATL,pKBmm_b1)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); else Mjoin(PATL,pKBmm_b0)(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); } }
static void ATL_gNBmm_b0 (const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc) /* * ALPHA is known to be 1 (handled by copy) * BETA is known to be 0; we handle actual BETA in putblk phase */ { if (M == MB && N == NB && K == KB) { NBmm_b0(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rzero, C+NBNB, ldc); NBmm_b0(M, N, K, ATL_rone, A, lda, B+NBNB, ldb, ATL_rzero, C, ldc); NBmm_bX(M, N, K, ATL_rone, A+NBNB, lda, B+NBNB, ldb, ATL_rnone, C+NBNB, ldc); NBmm_b1(M, N, K, ATL_rone, A+NBNB, lda, B, ldb, ATL_rone, C, ldc); } else if (M != MB) { if (N == NB && K == KB) { Mjoin(PATLU,pMBmm_b0)(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rzero, C+M*N, ldc); Mjoin(PATLU,pMBmm_b0)(M, N, K, ATL_rone, A, lda, B+NBNB, ldb, ATL_rzero, C, ldc); Mjoin(PATLU,pMBmm_bX)(M, N, K, ATL_rone, A+M*K, lda, B+NBNB, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pMBmm_b1)(M, N, K, ATL_rone, A+M*K, lda, B, ldb, ATL_rone, C, ldc); } else { Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rzero, C+M*N, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B+N*K, ldb, ATL_rzero, C, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B, ldb, ATL_rone, C, ldc); } } else if (N != NB) /* ib is full */ { if (K == KB) { Mjoin(PATLU,pNBmm_b0)(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rzero, C+M*N, ldc); Mjoin(PATLU,pNBmm_b0)(M, N, K, ATL_rone, A, lda, B+N*K, ldb, ATL_rzero, C, ldc); Mjoin(PATLU,pNBmm_bX)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pNBmm_b1)(M, N, K, ATL_rone, A+M*K, lda, B, ldb, ATL_rone, C, ldc); } else { Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rzero, C+M*N, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A, lda, B+N*K, ldb, ATL_rzero, C, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pKBmm)(M, N, K, ATL_rone, A+M*K, lda, B, ldb, ATL_rone, C, ldc); } } else /* ib and jb are full */ { Mjoin(PATLU,pKBmm_b0)(M, N, K, ATL_rone, A, lda, B, ldb, ATL_rzero, C+M*N, ldc); Mjoin(PATLU,pKBmm_b0)(M, N, K, ATL_rone, A, lda, B+N*K, ldb, ATL_rzero, C, ldc); Mjoin(PATLU,pKBmm_bX)(M, N, K, ATL_rone, A+M*K, lda, B+N*K, ldb, ATL_rnone, C+M*N, ldc); Mjoin(PATLU,pKBmm_b1)(M, N, K, ATL_rone, A+M*K, lda, B, ldb, ATL_rone, C, ldc); } }