static TYPE *ATL_LmulUC(const int M, const int N, const TYPE *LU, const int ldl) { const int lda = ldl SHIFT, MN = Mmin(M,N); int i, j, m; TYPE *C, *c; #ifdef TREAL const TYPE ONE=ATL_rone; #else const TYPE ONE[2] = {ATL_rone, ATL_rzero}; #endif C = c = malloc(M*ATL_MulBySize(N)); ATL_assert(c); if (M >= N) { for (j=0; j < MN; j++) { m = j SHIFT; for (i=0; i < m; i++) c[i] = ATL_rzero; #ifdef TCPLX c[i++] = ATL_rone; c[i++] = ATL_rzero; #else c[i++] = ATL_rone; #endif for (m=M SHIFT; i < m; i++) c[i] = LU[i]; c += m; LU += lda; } LU -= MN * lda; for (m=M SHIFT; j < N; j++, c += m) Mjoin(PATL,zero)(M, c, 1); cblas_trmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, M, N, ONE, LU, ldl, C, M); } else /* M < N */ { for (j=0; j < M; j++) { m = (j+1) SHIFT; for (i=0; i < m; i++) c[i] = LU[i]; for (m=M SHIFT; i < m; i++) c[i] = ATL_rzero; c += m; LU += lda; } Mjoin(PATL,gecopy)(M, N-M, LU, ldl, c, M); LU -= M * lda; cblas_trmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, M, N, ONE, LU, ldl, C, M); } return(C); }
void ATL_lauumL(const int N, TYPE *A, const int lda) { int Nleft, Nright; #ifdef TREAL const TYPE one=ATL_rone; #else const TYPE one[2]={ATL_rone, ATL_rzero}; #endif TYPE *G, *U0=A, *U1; if (N > 1) { Nleft = N >> 1; #ifdef NB if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft)); #endif Nright = N - Nleft; #ifdef RowMajor_ G = A + Nleft*(lda SHIFT); U1 = G + (Nleft SHIFT); #else G = A + (Nleft SHIFT); U1 = G + Nleft*(lda SHIFT); #endif ATL_lauumL(Nleft, U0, lda); my_syrk(MyOrder, CblasLower, my_trans, Nleft, Nright, ATL_rone, G, lda, ATL_rone, U0, lda); cblas_trmm(MyOrder, CblasLeft, CblasLower, my_trans, CblasNonUnit, Nright, Nleft, one, U1, lda, G, lda); ATL_lauumL(Nright, U1, lda); }
static TYPE *ATL_LmulUR(const int M, const int N, const TYPE *LU, const int ldl) { const int lda = ldl SHIFT, ldc = N SHIFT, MN = Mmin(M,N); int i, j, m; TYPE *C, *c; #ifdef TREAL const TYPE ONE=ATL_rone; #else const TYPE ONE[2] = {ATL_rone, ATL_rzero}; #endif C = c = malloc(M*ATL_MulBySize(N)); ATL_assert(c); if (M >= N) { for (i=0; i != N; i++, LU += lda, C += ldc) { Mjoin(PATL,copy)(i+1, LU, 1, C, 1); Mjoin(PATL,zero)(N-i-1, C+((i+1)SHIFT), 1); } for(; i != M; i++, LU += lda, C += ldc) Mjoin(PATL,copy)(N, LU, 1, C, 1); LU -= lda * M; C -= ldc * M; cblas_trmm(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans, CblasUnit, M, N, ONE, LU, ldl, C, N); } else /* N > M */ { for (i=0; i != M; i++, C += ldc, LU += lda) { Mjoin(PATL,zero)(i, C, 1); C[i SHIFT] = ATL_rone; #ifdef TCPLX C[(i SHIFT)+1] = ATL_rzero; #endif Mjoin(PATL,copy)(N-i-1, LU+((i+1)SHIFT), 1, C+((i+1)SHIFT), 1); } LU -= lda * M; C -= ldc * M; cblas_trmm(CblasRowMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, M, N, ONE, LU, ldl, C, N); } return(C); }
static TYPE *ATL_LtmulL (const enum CBLAS_ORDER Order, const int N, const TYPE *L, const int ldl) { TYPE *C; #ifdef TREAL const TYPE one=ATL_rone, zero=ATL_rzero; #else const TYPE one[2] = {ATL_rone,ATL_rzero}, zero[2] = {ATL_rzero,ATL_rzero}; #endif C = malloc(N*ATL_MulBySize(N)); ATL_assert(C); ATL_L2GE(Order, N, L, ldl, C, N); cblas_trmm(Order, CblasLeft, CblasLower, CblasConjTrans, CblasNonUnit, N, N, one, L, ldl, C, N); return(C); }
static TYPE *ATL_UmulUt (const enum CBLAS_ORDER Order, const int N, const TYPE *U, const int ldu) { TYPE *C; #ifdef TREAL const TYPE one=ATL_rone, zero=ATL_rzero; #else const TYPE one[2] = {ATL_rone,ATL_rzero}, zero[2] = {ATL_rzero,ATL_rzero}; #endif C = malloc(N*ATL_MulBySize(N)); ATL_assert(C); ATL_U2GE(Order, N, U, ldu, C, N); cblas_trmm(Order, CblasRight, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, one, U, ldu, C, N); return(C); }
static TYPE trtritest(enum ATLAS_ORDER Order, enum ATLAS_UPLO Uplo, enum ATLAS_DIAG Diag, int CacheSize, int N, int lda, double *tim) { TYPE *A, *Acompare; int i; double t0, t1; TYPE normA, eps, resid; /*int ierr;*/ #ifdef TCPLX const TYPE one[2]={ATL_rone, ATL_rzero}; #else const TYPE one = ATL_rone; #endif eps = Mjoin(PATL,epsilon)(); A = malloc(ATL_MulBySize(lda)*N); Acompare = malloc(ATL_MulBySize(lda)*N); if (A == NULL) return(-1); if (Acompare == NULL) return(-1); t0 = ATL_flushcache(CacheSize); /* create random, diagonally dominant matrix with magic value at unused places. Last number is just the random seed. */ trigen(Order, Uplo, Diag, N, A, lda, PADVAL, N*1029+lda); /* Create backup to calculate residual. This one has to be used as a full matrix, so it has zero fills and correct diagonal. */ trigen(Order, Uplo, Diag, N, Acompare, lda, ATL_rzero, N*1029+lda); if (Diag==AtlasUnit) for (i=0; i < N; i++) Acompare[(i*(lda+1)) SHIFT] = ATL_rone; normA = trinrm1(Order,Uplo, Diag, N, A, lda); #ifdef DEBUG Mjoin(PATL,geprint)("A0", N, N, A, lda); #endif t0 = ATL_flushcache(-1); /* Calculate and time a solution */ t0 = time00(); test_trtri(Order, Uplo, Diag, N, A, lda); t1 = time00() - t0; *tim = t1; /* if (ierr != 0) { fprintf(stderr, "Return values != 0 : %d \n",ierr); return(9999.9999); }*/ t0 = ATL_flushcache(0); /* Instroduce a padding error. */ /* A[(5+5*lda)SHIFT]=114.0; */ #ifdef DEBUG Mjoin(PATL,geprint)("L", N, N, A, lda); #endif ATL_checkpad(Order, Uplo, Diag, N, A, lda); /* Calculate A^{-1}*A */ cblas_trmm(Order,CblasLeft,Uplo,AtlasNoTrans,Diag, N,N,one,A,lda,Acompare,lda); #ifdef DEBUG Mjoin(PATL,geprint)("A^{-1}*A", N, N, Acompare, N); #endif /* Subtract diagonal */ for (i=0; i < N; i++) Acompare[i*((lda+1) SHIFT)] -= ATL_rone; /* resid = trinrm1(Order, Uplo,AtlasNonUnit,N,Acompare,lda); fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid); */ resid = Mjoin(PATL,genrm1)(N, N, Acompare, lda); #ifdef DEBUG if (resid/(normA*eps*N) > 10.0) fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid); #endif resid /= (normA * eps * N); free(Acompare); free(A); return(resid); }