void Mjoin(Mjoin(PATL,trmmL),ATLP) (const int M, const int N, const void *valpha, const void *A, const int lda, void *C, const int ldc) { #ifdef TREAL const SCALAR alpha=*( (const SCALAR *)valpha ); const SCALAR one=1.0, zero=0.0; #else const TYPE zero[2]={0.0,0.0}; #define alpha valpha #endif void *va; TYPE *a; if (N > TRMM_Xover) { va = malloc(ATL_Cachelen + ATL_MulBySize(M)*M); ATL_assert(va); a = ATL_AlignPtr(va); #ifdef TREAL if ( SCALAR_IS_ONE(alpha) ) Mjoin(ATL_trcopy,_a1)(M, alpha, A, lda, a); else Mjoin(ATL_trcopy,_aX)(M, alpha, A, lda, a); CAgemmTN(M, N, M, one, a, M, C, ldc, zero, C, ldc); #else ATL_trcopy(M, A, lda, a); CAgemmTN(M, N, M, valpha, a, M, C, ldc, zero, C, ldc); #endif free(va); } else Mjoin(PATL,reftrmm)(AtlasLeft, Uplo_, Trans_, Unit_, M, N, alpha, A, lda, C, ldc); }
void Mjoin(Mjoin(PATL,trsmR),ATLP) (const int M, const int N, const void *valpha, const void *A, const int lda, void *C, const int ldc) { const TYPE *alpha=valpha; #ifdef TREAL #if defined(Transpose_) || defined(ConjTrans_) if ( M > (N<<2) ) { void *va; TYPE *a; va = malloc(ATL_Cachelen + (ATL_MulBySize(N*N))); ATL_assert(va); a = ATL_AlignPtr(va); #ifdef TREAL Mjoin(ATL_trcopy,_a1)(N, ATL_rone, A, lda, a); #else ATL_trcopy(N, A, lda, a); #endif Mjoin(Mjoin(PATL,trsmKR),ATLPt)(M, N, *alpha, a, N, C, ldc); free(va); } else Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, *alpha, A, lda, C, ldc); #else Mjoin(Mjoin(PATL,trsmKR),ATLP)(M, N, *alpha, A, lda, C, ldc); #endif #else if (M > (N<<2) && N <= 4) Mjoin(PATL,CtrsmKR)(Uplo_, Trans_, Unit_, M, N, valpha, A, lda, C, ldc); else Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, alpha, A, lda, C, ldc); #endif }