int ATL_getrfC(const int M, const int N, TYPE *A, const int lda, int *ipiv) /* * Column-major factorization of form * A = P * L * U * where P is a row-permutation matrix, L is lower triangular with unit diagonal * elements (lower trapazoidal if M > N), and U is upper triangular (upper * trapazoidal if M < N). This is the recursive Level 3 BLAS version. */ { const int MN = Mmin(M, N); int Nleft, Nright, k, i, ierr=0; #ifdef TCPLX const TYPE one[2] = {ATL_rone, ATL_rzero}; const TYPE none[2] = {ATL_rnone, ATL_rzero}; TYPE inv[2], tmp[2]; #else #define one ATL_rone #define none ATL_rnone TYPE tmp; #endif TYPE *Ac, *An; if (((size_t)M)*N <= ATL_L1elts) return(Mjoin(PATL,getf2)(M, N, A, lda, ipiv)); #if defined(ATL_USEPTHREADS) && defined(ATL_USEPCA) if (N <= (NB<<2) && N >= 16 && M-N >= ATL_PCAMin && ((size_t)ATL_MulBySize(M)*N) <= CacheEdge*ATL_NTHREADS) { if (N >= 16) ierr = Mjoin(PATL,tgetf2)(M, N, A, lda, ipiv); else ierr = Mjoin(PATL,tgetf2_nocp)(M, N, A, lda, ipiv); return(ierr); } #endif if (MN > ATL_luMmin) { Nleft = MN >> 1; #ifdef NB if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft)); #endif Nright = N - Nleft; i = ATL_getrfC(M, Nleft, A, lda, ipiv); /* factor left to L & U */ if (i) if (!ierr) ierr = i; /* * Update trailing submatrix */ Ac = A + (Nleft * lda SHIFT); An = Ac + (Nleft SHIFT); ATL_laswp(Nright, Ac, lda, 0, Nleft, ipiv, 1); cblas_trsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, Nleft, Nright, one, A, lda, Ac, lda); cblas_gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M-Nleft, Nright, Nleft, none, A+(Nleft SHIFT), lda, Ac, lda, one, An, lda); i = ATL_getrfC(M-Nleft, Nright, An, lda, ipiv+Nleft); if (i) if (!ierr) ierr = i + Nleft; for (i=Nleft; i != MN; i++) ipiv[i] += Nleft; ATL_laswp(Nleft, A, lda, Nleft, MN, ipiv, 1); }
int ATL_getrf(const enum CBLAS_ORDER Order, const int M, const int N, TYPE *A, const int lda, int *ipiv) { if (Order == CblasColMajor) return(ATL_getrfC(M, N, A, lda, ipiv)); else return(ATL_getrfR(M, N, A, lda, ipiv)); }