示例#1
0
文件: invtrsm.c 项目: certik/vendor
static TYPE geresid(enum CBLAS_ORDER Order, int N, TYPE *A, int lda,
                    TYPE *AI, int ldi)
/*
 * returns ||A - AI|| / (N * eps * ||A|| * ||AI||);
 * for row-major, we are not using 1-norm, since we are adding rows instead
 * of cols, but it should be an equally good norm, so don't worry about it.
 */
{
   TYPE numer, denom, eps;
   const int ldcp1 = (N+1)SHIFT;
   TYPE *C;
   int i;

   #ifdef TREAL
      TYPE one = ATL_rone, zero = ATL_rzero;
   #else
      TYPE one[2] = {ATL_rone, ATL_rzero}, zero[2] = {ATL_rzero, ATL_rzero};
   #endif

   eps = Mjoin(PATL,epsilon)();
   C = malloc(N*ATL_MulBySize(N));
   ATL_assert(C);
   cblas_gemm(Order, CblasNoTrans, CblasNoTrans, N, N, N, one, A, lda,
              AI, ldi, zero, C, N);                /* C now has A*inv(A) */
   for (i=0; i != N; i++) C[i*ldcp1] -= ATL_rone;  /* C now has A*inv(A)-I */
   numer = Mjoin(PATL,genrm1)(N, N, C, N);
   denom = Mjoin(PATL,genrm1)(N, N, A, lda) *
           Mjoin(PATL,genrm1)(N, N, AI, ldi) * N * eps;
   free(C);
   return(numer/denom);
}
示例#2
0
int ATL_getriC(const int N, TYPE *A, const int lda, const int *ipiv,
               TYPE *wrk, const int lwrk)
{
   const int lda2 = lda SHIFT;
   int J, jb, nb, nright, iret;
   TYPE *A0 = A;
   #ifdef TREAL
      const TYPE one=ATL_rone, none=ATL_rnone;
   #else
      const TYPE one[2]={ATL_rone,ATL_rzero}, none[2]={ATL_rnone, ATL_rzero};
   #endif

   iret = ATL_trtri(CblasColMajor, CblasUpper, CblasNonUnit, N, A, lda);
   if (!iret && N > 1)
   {
/*
 *    Find largest NB we can use with our provided workspace
 */
      jb = lwrk / N;
      if (jb >= NB) nb = ATL_MulByNB(ATL_DivByNB(jb));
      else if (jb >= ATL_mmNU) nb = (jb/ATL_mmNU)*ATL_mmNU;
      else nb = jb;
      if (!nb) return(-6);  /* need at least 1 col of workspace */
/*
 *    Only first iteration will have partial block, unroll it
 */
      jb = N - (N/nb)*nb;
      if (!jb) jb = nb;
      J = N - jb;
      A += lda2*J;
      trcpzeroL(jb, jb, A+(J SHIFT), lda, wrk, jb);
      cblas_trsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasUnit,
                 N, jb, one, wrk, jb, A, lda);
      if (J)
      {
         do
         {
            J -= nb;
            A -= nb*lda2;
            nright = N-J;
            trcpzeroL(nright, nb, A+(J SHIFT), lda, wrk, nright);
            cblas_gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, nb,
                       nright-nb, none, A+nb*lda2, lda, wrk+(nb SHIFT), nright,
                       one, A, lda);
            cblas_trsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,
                       CblasUnit, N, nb, one, wrk, nright, A, lda);
         }
         while(J);
      }
/*
 *    Apply column interchanges
 */
      for (J=N-2; J >= 0; J--)
      {
         jb = ipiv[J];
         if (jb != J) cblas_swap(N, A+J*lda2, 1, A+jb*lda2, 1);
      }
   }
   return(iret);
}
示例#3
0
文件: ATL_getrfC.c 项目: AIDman/Kaldi
int ATL_getrfC(const int M, const int N, TYPE *A, const int lda, int *ipiv)
/*
 * Column-major factorization of form
 *   A = P * L * U
 * where P is a row-permutation matrix, L is lower triangular with unit diagonal
 * elements (lower trapazoidal if M > N), and U is upper triangular (upper
 * trapazoidal if M < N).  This is the recursive Level 3 BLAS version.
 */
{
   const int MN = Mmin(M, N);
   int Nleft, Nright, k, i, ierr=0;
   #ifdef TCPLX
      const TYPE one[2] = {ATL_rone, ATL_rzero};
      const TYPE none[2] = {ATL_rnone, ATL_rzero};
      TYPE inv[2], tmp[2];
   #else
      #define one ATL_rone
      #define none ATL_rnone
      TYPE tmp;
   #endif
   TYPE *Ac, *An;

   if (((size_t)M)*N <= ATL_L1elts)
      return(Mjoin(PATL,getf2)(M, N, A, lda, ipiv));
   #if defined(ATL_USEPTHREADS) && defined(ATL_USEPCA)
      if (N <= (NB<<2) && N >= 16 && M-N >= ATL_PCAMin &&
          ((size_t)ATL_MulBySize(M)*N) <= CacheEdge*ATL_NTHREADS)
      {
         if (N >= 16)
            ierr = Mjoin(PATL,tgetf2)(M, N, A, lda, ipiv);
         else
            ierr = Mjoin(PATL,tgetf2_nocp)(M, N, A, lda, ipiv);
         return(ierr);
      }
   #endif
   if (MN > ATL_luMmin)
   {
      Nleft = MN >> 1;
      #ifdef NB
         if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
      #endif
      Nright = N - Nleft;
      i = ATL_getrfC(M, Nleft, A, lda, ipiv);  /* factor left to L & U */
      if (i) if (!ierr) ierr = i;
/*
 *    Update trailing submatrix
 */
      Ac = A + (Nleft * lda SHIFT);
      An = Ac + (Nleft SHIFT);
      ATL_laswp(Nright, Ac, lda, 0, Nleft, ipiv, 1);
      cblas_trsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
                 Nleft, Nright, one, A, lda, Ac, lda);
      cblas_gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M-Nleft, Nright,
                 Nleft, none, A+(Nleft SHIFT), lda, Ac, lda, one, An, lda);
      i = ATL_getrfC(M-Nleft, Nright, An, lda, ipiv+Nleft);
      if (i) if (!ierr) ierr = i + Nleft;
      for (i=Nleft; i != MN; i++) ipiv[i] += Nleft;
      ATL_laswp(Nleft, A, lda, Nleft, MN, ipiv, 1);
   }
示例#4
0
int ATL_getrfR(const int M, const int N, TYPE *A, const int lda, int *ipiv)
/*
 * Row-major factorization of form
 *   A = L * U * P
 * where P is a column-permutation matrix, L is lower triangular (lower
 * trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
 * trapazoidal if M < N).  This is the recursive Level 3 BLAS version.
 */
{
   const int MN = Mmin(M, N);
   int Nup, Ndown, i, ierr=0;
   #ifdef TCPLX
      const TYPE one[2] = {ATL_rone, ATL_rzero};
      const TYPE none[2] = {ATL_rnone, ATL_rzero};
      TYPE inv[2], tmp[2];
   #else
      #define one ATL_rone
      #define none ATL_rnone
      TYPE tmp;
   #endif
   TYPE *Ar, *Ac, *An;

   if (MN > 1)
   {
      Nup = MN >> 1;
      #ifdef NB
         if (Nup > NB) Nup = ATL_MulByNB(ATL_DivByNB(Nup));
      #endif
      Ndown = M - Nup;
      i = ATL_getrfR(Nup, N, A, lda, ipiv);
      if (i) if (!ierr) ierr = i;
      Ar = A + (Nup * lda SHIFT);
      Ac = A + (Nup SHIFT);
      An = Ar + (Nup SHIFT);

      ATL_laswp(Ndown, Ar, lda, 0, Nup, ipiv, 1);  /* apply pivots */
      cblas_trsm(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans,
                 CblasUnit, Ndown, Nup, one, A, lda, Ar, lda);
      cblas_gemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, Ndown, N-Nup, Nup,
                 none, Ar, lda, Ac, lda, one, An, lda);

      i = ATL_getrfR(Ndown, N-Nup, An, lda, ipiv+Nup);
      if (i) if (!ierr) ierr = Nup + i;
      for (i=Nup; i != MN; i++) ipiv[i] += Nup;
      ATL_laswp(Nup, A, lda, Nup, MN, ipiv, 1);  /* apply pivots */
   }