Beispiel #1
0
void ATL_USERMM(int m, int n, int k,
		 const TYPE alpha, const TYPE *A, const int LdA,
		 const TYPE *B, const int LdB, const TYPE beta,
		 TYPE *C, const int LdC
		)
{
  int i, j, msize, ki;
   const int incA=LdA<<2, incB=LdB<<2, incC=(LdC<<2)SHIFT;

   #if !defined(KB) || (KB == 1) || (KB == 0)
      if (k == 1)
      {
         ATL_myger(m, n, A, B, beta, C, LdC);
         return;
      }
   #endif
  while (m>0) {				/* while : partition level */
    const TYPE *a, *b; TYPE *c;
    c = C; b = B;
    msize = m;
    for (i=-n; i<0; i+=4) {
      TYPE *cx;
      cx = c; a = A;
      for (j=-msize; j<0; j+=4) {
	__InnerLoop_(k, a, LdA, b, LdB, beta, cx, (LdC SHIFT));
	a += incA; cx += 4 SHIFT;
      } /* for j */
      b += incB; c += incC;
    } /* for i */
    A += msize*LdA; C += msize SHIFT;
    m -= msize;
  } /* while m>0 */
}
void ATL_USERMM
   (const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc)
{
   ATL_myger(M, N, A, B, beta, C, ldc);
}