示例#1
0
文件: lutst.c 项目: apollos/atlas
static TYPE *ATL_LmulUC(const int M, const int N, const TYPE *LU, const int ldl)
{
   const int lda = ldl SHIFT, MN = Mmin(M,N);
   int i, j, m;
   TYPE *C, *c;
   #ifdef TREAL
      const TYPE ONE=ATL_rone;
   #else
      const TYPE ONE[2] = {ATL_rone, ATL_rzero};
   #endif

   C = c = malloc(M*ATL_MulBySize(N));
   ATL_assert(c);
   if (M >= N)
   {
      for (j=0; j < MN; j++)
      {
         m = j SHIFT;
         for (i=0; i < m; i++) c[i] = ATL_rzero;
         #ifdef TCPLX
            c[i++] = ATL_rone;
            c[i++] = ATL_rzero;
         #else
            c[i++] = ATL_rone;
         #endif
         for (m=M SHIFT; i < m; i++) c[i] = LU[i];
         c += m;
         LU += lda;
      }
      LU -= MN * lda;
      for (m=M SHIFT; j < N; j++, c += m) Mjoin(PATL,zero)(M, c, 1);
      cblas_trmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,
                 CblasNonUnit, M, N, ONE, LU, ldl, C, M);
   }
   else /* M < N */
   {
      for (j=0; j < M; j++)
      {
         m = (j+1) SHIFT;
         for (i=0; i < m; i++) c[i] = LU[i];
         for (m=M SHIFT; i < m; i++) c[i] = ATL_rzero;
         c += m;
         LU += lda;
      }
      Mjoin(PATL,gecopy)(M, N-M, LU, ldl, c, M);
      LU -= M * lda;
      cblas_trmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans,
                 CblasUnit, M, N, ONE, LU, ldl, C, M);
   }
   return(C);
}
示例#2
0
void ATL_lauumL(const int N, TYPE *A, const int lda)
{
   int Nleft, Nright;
   #ifdef TREAL
      const TYPE one=ATL_rone;
   #else
      const TYPE one[2]={ATL_rone, ATL_rzero};
   #endif
   TYPE *G, *U0=A, *U1;

   if (N > 1)
   {
      Nleft = N >> 1;
      #ifdef NB
         if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
      #endif
      Nright = N - Nleft;
      #ifdef RowMajor_
         G = A + Nleft*(lda SHIFT);
         U1 = G + (Nleft SHIFT);
      #else
         G = A + (Nleft SHIFT);
         U1 = G + Nleft*(lda SHIFT);
      #endif
      ATL_lauumL(Nleft, U0, lda);
      my_syrk(MyOrder, CblasLower, my_trans, Nleft, Nright, ATL_rone,
              G, lda, ATL_rone, U0, lda);
      cblas_trmm(MyOrder, CblasLeft, CblasLower, my_trans, CblasNonUnit,
                 Nright, Nleft, one, U1, lda, G, lda);
      ATL_lauumL(Nright, U1, lda);
   }
示例#3
0
文件: lutst.c 项目: apollos/atlas
static TYPE *ATL_LmulUR(const int M, const int N, const TYPE *LU, const int ldl)
{
   const int lda = ldl SHIFT, ldc = N SHIFT, MN = Mmin(M,N);
   int i, j, m;
   TYPE *C, *c;
   #ifdef TREAL
      const TYPE ONE=ATL_rone;
   #else
      const TYPE ONE[2] = {ATL_rone, ATL_rzero};
   #endif

   C = c = malloc(M*ATL_MulBySize(N));
   ATL_assert(c);
   if (M >= N)
   {
      for (i=0; i != N; i++, LU += lda, C += ldc)
      {
         Mjoin(PATL,copy)(i+1, LU, 1, C, 1);
         Mjoin(PATL,zero)(N-i-1, C+((i+1)SHIFT), 1);
      }
      for(; i != M; i++, LU += lda, C += ldc) Mjoin(PATL,copy)(N, LU, 1, C, 1);
      LU -= lda * M;
      C -= ldc * M;
      cblas_trmm(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans, CblasUnit,
                 M, N, ONE, LU, ldl, C, N);
   }
   else /* N > M */
   {
      for (i=0; i != M; i++, C += ldc, LU += lda)
      {
         Mjoin(PATL,zero)(i, C, 1);
         C[i SHIFT] = ATL_rone;
         #ifdef TCPLX
            C[(i SHIFT)+1] = ATL_rzero;
         #endif
         Mjoin(PATL,copy)(N-i-1, LU+((i+1)SHIFT), 1, C+((i+1)SHIFT), 1);
      }
      LU -= lda * M;
      C -= ldc * M;
      cblas_trmm(CblasRowMajor, CblasLeft, CblasLower, CblasNoTrans,
                 CblasNonUnit, M, N, ONE, LU, ldl, C, N);
   }
   return(C);
}
示例#4
0
static TYPE *ATL_LtmulL
   (const enum CBLAS_ORDER Order, const int N, const TYPE *L, const int ldl)
{
   TYPE *C;
   #ifdef TREAL
      const TYPE one=ATL_rone, zero=ATL_rzero;
   #else
      const TYPE one[2] = {ATL_rone,ATL_rzero}, zero[2] = {ATL_rzero,ATL_rzero};
   #endif
   C = malloc(N*ATL_MulBySize(N));
   ATL_assert(C);
   ATL_L2GE(Order, N, L, ldl, C, N);
   cblas_trmm(Order, CblasLeft, CblasLower, CblasConjTrans, CblasNonUnit,
              N, N, one, L, ldl, C, N);
   return(C);
}
示例#5
0
static TYPE *ATL_UmulUt
   (const enum CBLAS_ORDER Order, const int N, const TYPE *U, const int ldu)
{
   TYPE *C;
   #ifdef TREAL
      const TYPE one=ATL_rone, zero=ATL_rzero;
   #else
      const TYPE one[2] = {ATL_rone,ATL_rzero}, zero[2] = {ATL_rzero,ATL_rzero};
   #endif
   C = malloc(N*ATL_MulBySize(N));
   ATL_assert(C);
   ATL_U2GE(Order, N, U, ldu, C, N);
   cblas_trmm(Order, CblasRight, CblasUpper, CblasConjTrans, CblasNonUnit,
              N, N, one, U, ldu, C, N);
   return(C);
}
示例#6
0
文件: trtritst.c 项目: AIDman/Kaldi
static TYPE trtritest(enum ATLAS_ORDER Order, enum ATLAS_UPLO Uplo,
                      enum ATLAS_DIAG Diag, int CacheSize, int N, int lda,
		      double *tim)
{
   TYPE *A, *Acompare;
   int i;
   double t0, t1;
   TYPE normA, eps, resid;
   /*int ierr;*/

   #ifdef TCPLX
      const TYPE one[2]={ATL_rone, ATL_rzero};
   #else
      const TYPE one = ATL_rone;
   #endif


   eps = Mjoin(PATL,epsilon)();
   A = malloc(ATL_MulBySize(lda)*N);
   Acompare = malloc(ATL_MulBySize(lda)*N);
   if (A == NULL) return(-1);
   if (Acompare == NULL) return(-1);
   t0 = ATL_flushcache(CacheSize);

   /* create random, diagonally dominant matrix with magic value at
      unused places. Last number is just the random seed. */
   trigen(Order, Uplo, Diag, N, A, lda, PADVAL, N*1029+lda);

   /* Create backup to calculate residual. This one has to be used
      as a full matrix, so it has zero fills and correct diagonal. */
   trigen(Order, Uplo, Diag, N, Acompare, lda, ATL_rzero, N*1029+lda);
   if (Diag==AtlasUnit)
     for (i=0; i < N; i++)
       Acompare[(i*(lda+1)) SHIFT] = ATL_rone;


   normA = trinrm1(Order,Uplo, Diag, N, A, lda);
#ifdef DEBUG
   Mjoin(PATL,geprint)("A0", N, N, A, lda);
#endif

   t0 = ATL_flushcache(-1);

   /* Calculate and time a solution */
   t0 = time00();
   test_trtri(Order, Uplo, Diag, N, A, lda);
   t1 = time00() - t0;
   *tim = t1;

/*   if (ierr != 0)
   {
     fprintf(stderr, "Return values != 0 : %d \n",ierr);
     return(9999.9999);
   }*/


   t0 = ATL_flushcache(0);

   /* Instroduce a padding error. */
   /* A[(5+5*lda)SHIFT]=114.0; */

#ifdef DEBUG
   Mjoin(PATL,geprint)("L", N, N, A, lda);
#endif
   ATL_checkpad(Order, Uplo, Diag, N, A, lda);

   /* Calculate A^{-1}*A */
   cblas_trmm(Order,CblasLeft,Uplo,AtlasNoTrans,Diag,
		    N,N,one,A,lda,Acompare,lda);

#ifdef DEBUG
     Mjoin(PATL,geprint)("A^{-1}*A", N, N, Acompare, N);
#endif

   /* Subtract diagonal */
   for (i=0; i < N; i++)
     Acompare[i*((lda+1) SHIFT)] -= ATL_rone;

/*
   resid = trinrm1(Order, Uplo,AtlasNonUnit,N,Acompare,lda);
   fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid);
*/

   resid = Mjoin(PATL,genrm1)(N, N, Acompare, lda);


#ifdef DEBUG
   if (resid/(normA*eps*N) > 10.0)
     fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid);
#endif
   resid /= (normA * eps * N);

   free(Acompare);
   free(A);

   return(resid);
}