Example #1
0
int ATL_getrfC(const int M, const int N, TYPE *A, const int lda, int *ipiv)
/*
 * Column-major factorization of form
 *   A = P * L * U
 * where P is a row-permutation matrix, L is lower triangular with unit diagonal
 * elements (lower trapazoidal if M > N), and U is upper triangular (upper
 * trapazoidal if M < N).  This is the recursive Level 3 BLAS version.
 */
{
   const int MN = Mmin(M, N);
   int Nleft, Nright, k, i, ierr=0;
   #ifdef TCPLX
      const TYPE one[2] = {ATL_rone, ATL_rzero};
      const TYPE none[2] = {ATL_rnone, ATL_rzero};
      TYPE inv[2], tmp[2];
   #else
      #define one ATL_rone
      #define none ATL_rnone
      TYPE tmp;
   #endif
   TYPE *Ac, *An;

   if (((size_t)M)*N <= ATL_L1elts)
      return(Mjoin(PATL,getf2)(M, N, A, lda, ipiv));
   #if defined(ATL_USEPTHREADS) && defined(ATL_USEPCA)
      if (N <= (NB<<2) && N >= 16 && M-N >= ATL_PCAMin &&
          ((size_t)ATL_MulBySize(M)*N) <= CacheEdge*ATL_NTHREADS)
      {
         if (N >= 16)
            ierr = Mjoin(PATL,tgetf2)(M, N, A, lda, ipiv);
         else
            ierr = Mjoin(PATL,tgetf2_nocp)(M, N, A, lda, ipiv);
         return(ierr);
      }
   #endif
   if (MN > ATL_luMmin)
   {
      Nleft = MN >> 1;
      #ifdef NB
         if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
      #endif
      Nright = N - Nleft;
      i = ATL_getrfC(M, Nleft, A, lda, ipiv);  /* factor left to L & U */
      if (i) if (!ierr) ierr = i;
/*
 *    Update trailing submatrix
 */
      Ac = A + (Nleft * lda SHIFT);
      An = Ac + (Nleft SHIFT);
      ATL_laswp(Nright, Ac, lda, 0, Nleft, ipiv, 1);
      cblas_trsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
                 Nleft, Nright, one, A, lda, Ac, lda);
      cblas_gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M-Nleft, Nright,
                 Nleft, none, A+(Nleft SHIFT), lda, Ac, lda, one, An, lda);
      i = ATL_getrfC(M-Nleft, Nright, An, lda, ipiv+Nleft);
      if (i) if (!ierr) ierr = i + Nleft;
      for (i=Nleft; i != MN; i++) ipiv[i] += Nleft;
      ATL_laswp(Nleft, A, lda, Nleft, MN, ipiv, 1);
   }
Example #2
0
int ATL_getrfR(const int M, const int N, TYPE *A, const int lda, int *ipiv)
/*
 * Row-major factorization of form
 *   A = L * U * P
 * where P is a column-permutation matrix, L is lower triangular (lower
 * trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
 * trapazoidal if M < N).  This is the recursive Level 3 BLAS version.
 */
{
   const int MN = Mmin(M, N);
   int Nup, Ndown, i, ierr=0;
   #ifdef TCPLX
      const TYPE one[2] = {ATL_rone, ATL_rzero};
      const TYPE none[2] = {ATL_rnone, ATL_rzero};
      TYPE inv[2], tmp[2];
   #else
      #define one ATL_rone
      #define none ATL_rnone
      TYPE tmp;
   #endif
   TYPE *Ar, *Ac, *An;

   if (MN > 1)
   {
      Nup = MN >> 1;
      #ifdef NB
         if (Nup > NB) Nup = ATL_MulByNB(ATL_DivByNB(Nup));
      #endif
      Ndown = M - Nup;
      i = ATL_getrfR(Nup, N, A, lda, ipiv);
      if (i) if (!ierr) ierr = i;
      Ar = A + (Nup * lda SHIFT);
      Ac = A + (Nup SHIFT);
      An = Ar + (Nup SHIFT);

      ATL_laswp(Ndown, Ar, lda, 0, Nup, ipiv, 1);  /* apply pivots */
      cblas_trsm(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans,
                 CblasUnit, Ndown, Nup, one, A, lda, Ar, lda);
      cblas_gemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, Ndown, N-Nup, Nup,
                 none, Ar, lda, Ac, lda, one, An, lda);

      i = ATL_getrfR(Ndown, N-Nup, An, lda, ipiv+Nup);
      if (i) if (!ierr) ierr = Nup + i;
      for (i=Nup; i != MN; i++) ipiv[i] += Nup;
      ATL_laswp(Nup, A, lda, Nup, MN, ipiv, 1);  /* apply pivots */
   }
Example #3
0
static TYPE lutestR(int CacheSize, int M, int N, int lda, int *npiv,
                    double *tim)
{
   TYPE *A, *LmU;
   int *ipiv;
   const int MN = Mmin(M,N);
   int i;
   double t0, t1;
   TYPE normA, eps, resid;

   eps = Mjoin(PATL,epsilon)();
   A = malloc(ATL_MulBySize(lda)*M);
   if (A == NULL) return(-1);
   ipiv = malloc( MN * sizeof(int) );
   if (ipiv == NULL)
   {
      free(A);
      return(-1);
   }
   t0 = ATL_flushcache(CacheSize);

   Mjoin(PATL,gegen)(N, M, A, lda, M*N+lda);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("A0", N, M, A, lda);
   #endif
   normA = Mjoin(PATL,genrm1)(N, M, A, lda); /* actually infnrm, but OK */

   t0 = ATL_flushcache(-1);

   t0 = time00();
   test_getrf(CblasRowMajor, M, N, A, lda, ipiv);
   t1 = time00() - t0;
   *tim = t1;

   t0 = ATL_flushcache(0);

   #ifdef DEBUG
      Mjoin(PATL,geprint)("LU", N, M, A, lda);
   #endif
   LmU = ATL_LmulUR(M, N, A, lda);  /* LmU contains L * U */
   #ifdef DEBUG
      Mjoin(PATL,geprint)("L*U", N, M, LmU, N);
   #endif
   Mjoin(PATL,gegen)(N, M, A, lda, M*N+lda);  /* regenerate A, overwriting LU */
   ATL_laswp(M, A, lda, 0, MN, ipiv, 1);  /* apply swaps to A */
   resid = Mjoin(PATL,gediffnrm1)(N, M, A, lda, LmU, N);
   resid /= (normA * eps * Mmin(M,N));
   *npiv = findnpvt(MN, ipiv);

   free(LmU);
   free(A);
   free(ipiv);

   return(resid);
}
Example #4
0
void ATL_getrs(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans,
               const int N, const int NRHS, const TYPE *A, const int lda,
               const int *ipiv, TYPE *B, const int ldb)
/*
 * OK, this pivoting crap is tricky.  The trick is, when we pivot columns
 * of the matrix, this effects X but not B, and when we pivot rows, this
 * effects B, but not X.  So, must never attempt to apply a Pr
 * (row permutation matrix) to X or a Pc to B.
 */
{
   enum CBLAS_DIAG Lunit, Uunit;
   #ifdef TREAL
      #define one ATL_rone
   #else
      const TYPE one[2] = {ATL_rone, ATL_rzero};
   #endif

   if (!N || !NRHS) return;

   if (Order == CblasColMajor)
   {
/*
 *    A*X = B.  Since we have pivoted A by Pr (PA=LU), we pivot B by Pr,
 *    **and this does not effect X at all**, so we solve
 *    X = inv(U)*inv(L)*(Pr * B)
 */
      if (Trans == CblasNoTrans)
      {
         ATL_laswp(NRHS, B, ldb, 0, N, ipiv, 1);
         cblas_trsm(Order, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
                    N, NRHS, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit,
                    N, NRHS, one, A, lda, B, ldb);
      }
/*
 *    trans(L*U = PA)  ==>  U' L' = A' P, so P is Pc, and does not effect B,
 *    U' L' Pc X = B  ==> Pc X = inv(L') * inv(U') * B, but we want
 *    X, not Pc X, so we apply inv(Pc) after doing these steps.
 */
      else
      {
         cblas_trsm(Order, CblasLeft, CblasUpper, Trans, CblasNonUnit,
                    N, NRHS, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasLeft, CblasLower, Trans, CblasUnit,
                    N, NRHS, one, A, lda, B, ldb);
         ATL_laswp(NRHS, B, ldb, 0, N, ipiv, -1);
      }
   }
/*
 * For row-major arrays, we actually have X^T and B^T, so must tranpose
 * both sides of equation, so what we are solving is:  X' * A' = B'
 */
   else
   {
/*
 *    A = LU*inv(Pc), X' * (LU*inv(Pc))' = B'  ==>  X' * inv(Pc) * U' * L' = B'
 *    X' * inv(Pc) = U' * L' * B', so apply inv(Pc) after solves.
 */
      if (Trans == CblasNoTrans)
      {
         cblas_trsm(Order, CblasRight, CblasLower, CblasTrans, CblasNonUnit,
                    NRHS, N, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasRight, CblasUpper, CblasTrans, CblasUnit,
                    NRHS, N, one, A, lda, B, ldb);
         ATL_laswp(NRHS, B, ldb, 0, N, ipiv, -1);
      }
/*
 *    A' = (LU*inv(Pc))', but Pc is on rows of non-trans matrix, so:
 *    X' * (inv(Pr)*L*U) = B'
 *    X' = (Pr * B') * inv(U) * inv(L)
 *    NOTE: this case is untested
 */
      else
      {
         ATL_laswp(NRHS, B, ldb, 0, N, ipiv, 1);
         cblas_trsm(Order, CblasRight, CblasUpper, CblasNoTrans, CblasUnit,
                    NRHS, N, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit,
                    NRHS, N, one, A, lda, B, ldb);
      }
   }
}