void ATL_USERMM(int m, int n, int k, const TYPE alpha, const TYPE *A, const int LdA, const TYPE *B, const int LdB, const TYPE beta, TYPE *C, const int LdC ) { int i, j, msize, ki; const int incA=LdA<<2, incB=LdB<<2, incC=(LdC<<2)SHIFT; #if !defined(KB) || (KB == 1) || (KB == 0) if (k == 1) { ATL_myger(m, n, A, B, beta, C, LdC); return; } #endif while (m>0) { /* while : partition level */ const TYPE *a, *b; TYPE *c; c = C; b = B; msize = m; for (i=-n; i<0; i+=4) { TYPE *cx; cx = c; a = A; for (j=-msize; j<0; j+=4) { __InnerLoop_(k, a, LdA, b, LdB, beta, cx, (LdC SHIFT)); a += incA; cx += 4 SHIFT; } /* for j */ b += incB; c += incC; } /* for i */ A += msize*LdA; C += msize SHIFT; m -= msize; } /* while m>0 */ }
void ATL_USERMM (const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc) { ATL_myger(M, N, A, B, beta, C, ldc); }