static int L2LU(const int M, const int N, TYPE *A, const int lda, int *ipiv) /* * Complex Level 2 based left-looking LU */ { TYPE *Ac=A; TYPE t0, tmp[2]; const TYPE one[2] = {ATL_rone, ATL_rzero}, none[2] = {ATL_rnone, ATL_rzero}; const int MN=Mmin(M,N), MN_1=MN-1, lda2=lda+lda; int ip, ip2, j, j2, jn, jn2, iret=0; for (j=0, j2=0, jn=1, jn2=2; j != MN; j=jn++, j2 += 2, jn2 += 2) { ipiv[j] = ip = j + cblas_iamax(M-j, Ac+j2, 1); ip2 = ip + ip; if (Ac[ip2] != ATL_rzero || Ac[ip2+1] != ATL_rzero) { Mjoin(PATL,cplxinvert)(1, Ac+ip2, 1, tmp, 1); cblas_swap(N, A+j2, lda, A+ip2, lda); cblas_scal(M-jn, tmp, Ac+jn2, 1); if (j != MN_1) { Ac += lda2; cblas_trsv(CblasColMajor, CblasLower, CblasNoTrans, CblasUnit, jn, A, lda, Ac, 1); cblas_gemv(CblasColMajor, CblasNoTrans, M-jn, jn, none, A+jn2, lda, Ac, 1, one, Ac+jn2, 1); } } else if (!iret) iret = jn; } return(iret); }
int ATL_getriC(const int N, TYPE *A, const int lda, const int *ipiv, TYPE *wrk, const int lwrk) { const int lda2 = lda SHIFT; int J, jb, nb, nright, iret; TYPE *A0 = A; #ifdef TREAL const TYPE one=ATL_rone, none=ATL_rnone; #else const TYPE one[2]={ATL_rone,ATL_rzero}, none[2]={ATL_rnone, ATL_rzero}; #endif iret = ATL_trtri(CblasColMajor, CblasUpper, CblasNonUnit, N, A, lda); if (!iret && N > 1) { /* * Find largest NB we can use with our provided workspace */ jb = lwrk / N; if (jb >= NB) nb = ATL_MulByNB(ATL_DivByNB(jb)); else if (jb >= ATL_mmNU) nb = (jb/ATL_mmNU)*ATL_mmNU; else nb = jb; if (!nb) return(-6); /* need at least 1 col of workspace */ /* * Only first iteration will have partial block, unroll it */ jb = N - (N/nb)*nb; if (!jb) jb = nb; J = N - jb; A += lda2*J; trcpzeroL(jb, jb, A+(J SHIFT), lda, wrk, jb); cblas_trsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasUnit, N, jb, one, wrk, jb, A, lda); if (J) { do { J -= nb; A -= nb*lda2; nright = N-J; trcpzeroL(nright, nb, A+(J SHIFT), lda, wrk, nright); cblas_gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, nb, nright-nb, none, A+nb*lda2, lda, wrk+(nb SHIFT), nright, one, A, lda); cblas_trsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasUnit, N, nb, one, wrk, nright, A, lda); } while(J); } /* * Apply column interchanges */ for (J=N-2; J >= 0; J--) { jb = ipiv[J]; if (jb != J) cblas_swap(N, A+J*lda2, 1, A+jb*lda2, 1); } } return(iret); }
static int LU1(ATL_CINT M, ATL_CINT N, ATL_CINT j, TYPE *A, ATL_CINT lda, int *ipiv) /* * Performs an LU factorization on jth column. N is the full width of * column panel, A is ptr to beginning of panel. * RETURNS: 0 on success, non-zero if no non-zero pivot exists */ { #ifdef TCPLX ATL_CINT lda2 = lda+lda; TYPE invs[2]; const TYPE none[2] = {ATL_rnone, ATL_rzero}; #else #define lda2 lda #define none ATL_rnone #endif TYPE *Ac = A + j*lda2; /* active column */ TYPE pivval=Ac[j]; ATL_INT ip; ipiv[j] = ip = j + cblas_iamax(M-j, Ac+(j SHIFT), 1); #ifdef TCPLX pivval = Mabs(Ac[ip+ip]) + Mabs(Ac[ip+ip+1]); #else pivval = Ac[ip]; #endif if (pivval != ATL_rzero) { if (ip != j) cblas_swap(N, A+(j SHIFT), lda, A+(ip SHIFT), lda); #ifdef TCPLX if (pivval >= ATL_laSAFMIN) { TYPE invs[2]; Mjoin(PATL,cplxinvert)(1, Ac+j+j, 1, invs, 1); cblas_scal(M-j-1, invs, Ac+j+j+2, 1); } else Mjoin(PATL,cplxdivide)(M-j-1, Ac+j+j, Ac+j+j+2, 1, Ac+j+j+2, 1); #else if (Mabs(pivval) >= ATL_laSAFMIN) cblas_scal(M-j-1, ATL_rone/pivval, Ac+j+1, 1); else { ATL_INT i; for (i=j+1; i < M; i++) Ac[j] /= pivval; } #endif return(0); } return(1); }