예제 #1
0
파일: ATL_trmmL.c 프로젝트: AIDman/Kaldi
void Mjoin(Mjoin(PATL,trmmL),ATLP)
   (const int M, const int N, const void *valpha, const void *A, const int lda,
    void *C, const int ldc)
{
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR one=1.0, zero=0.0;
   #else
      const TYPE zero[2]={0.0,0.0};
      #define alpha valpha
   #endif
   void *va;
   TYPE *a;

   if (N > TRMM_Xover)
   {
      va = malloc(ATL_Cachelen + ATL_MulBySize(M)*M);
      ATL_assert(va);
      a = ATL_AlignPtr(va);
      #ifdef TREAL
         if ( SCALAR_IS_ONE(alpha) ) Mjoin(ATL_trcopy,_a1)(M, alpha, A, lda, a);
         else Mjoin(ATL_trcopy,_aX)(M, alpha, A, lda, a);
         CAgemmTN(M, N, M, one, a, M, C, ldc, zero, C, ldc);
      #else
         ATL_trcopy(M, A, lda, a);
         CAgemmTN(M, N, M, valpha, a, M, C, ldc, zero, C, ldc);
      #endif
      free(va);
   }
   else Mjoin(PATL,reftrmm)(AtlasLeft, Uplo_, Trans_, Unit_, M, N, alpha,
                            A, lda, C, ldc);
}
예제 #2
0
void Mjoin(Mjoin(PATL,trsmR),ATLP)
   (const int M, const int N, const void *valpha, const void *A, const int lda,
    void *C, const int ldc)
{
   const TYPE *alpha=valpha;
#ifdef TREAL
   #if defined(Transpose_) || defined(ConjTrans_)
      if ( M > (N<<2) )
      {
         void *va;
	 TYPE *a;

         va = malloc(ATL_Cachelen + (ATL_MulBySize(N*N)));
         ATL_assert(va);
         a = ATL_AlignPtr(va);
         #ifdef TREAL
            Mjoin(ATL_trcopy,_a1)(N, ATL_rone, A, lda, a);
         #else
            ATL_trcopy(N, A, lda, a);
         #endif
         Mjoin(Mjoin(PATL,trsmKR),ATLPt)(M, N, *alpha, a, N, C, ldc);
         free(va);
      }
      else Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, *alpha,
                               A, lda, C, ldc);
   #else
      Mjoin(Mjoin(PATL,trsmKR),ATLP)(M, N, *alpha, A, lda, C, ldc);
   #endif
#else
   if (M > (N<<2) && N <= 4)
      Mjoin(PATL,CtrsmKR)(Uplo_, Trans_, Unit_, M, N, valpha, A, lda, C, ldc);
   else
      Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, alpha,
                          A, lda, C, ldc);
#endif
}