Exemplo n.º 1
0
int ATL_potrfRL(const int N, TYPE *A, const int lda)
{
    TYPE *An, *Ar;
    int Nleft, Nright, ierr;
    static const TYPE ONE[2] = {ATL_rone, ATL_rzero};
    const int lda2=lda+lda;

    if (N > 1)
    {
        Nleft = N >> 1;
#ifdef NB
        if (Nleft > NB<<1) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
#endif
        Nright = N - Nleft;
        ierr = ATL_potrfRL(Nleft, A, lda);
        if (!ierr)
        {
            Ar = A + Nleft * lda2;
            An = Ar + Nleft+Nleft;
            cblas_trsm(CblasRowMajor, CblasRight, CblasLower, CblasConjTrans,
                       CblasNonUnit, Nright, Nleft, ONE, A, lda, Ar, lda);
            cblas_herk(CblasRowMajor, CblasLower, CblasNoTrans, Nright, Nleft,
                       ATL_rnone, Ar, lda, ATL_rone, An, lda);
            ierr = ATL_potrfRL(Nright, An, lda);
            if (ierr) return(ierr+Nleft);
        }
        else return(ierr);
    }
Exemplo n.º 2
0
int ATL_getrfC(const int M, const int N, TYPE *A, const int lda, int *ipiv)
/*
 * Column-major factorization of form
 *   A = P * L * U
 * where P is a row-permutation matrix, L is lower triangular with unit diagonal
 * elements (lower trapazoidal if M > N), and U is upper triangular (upper
 * trapazoidal if M < N).  This is the recursive Level 3 BLAS version.
 */
{
   const int MN = Mmin(M, N);
   int Nleft, Nright, k, i, ierr=0;
   #ifdef TCPLX
      const TYPE one[2] = {ATL_rone, ATL_rzero};
      const TYPE none[2] = {ATL_rnone, ATL_rzero};
      TYPE inv[2], tmp[2];
   #else
      #define one ATL_rone
      #define none ATL_rnone
      TYPE tmp;
   #endif
   TYPE *Ac, *An;

   if (((size_t)M)*N <= ATL_L1elts)
      return(Mjoin(PATL,getf2)(M, N, A, lda, ipiv));
   #if defined(ATL_USEPTHREADS) && defined(ATL_USEPCA)
      if (N <= (NB<<2) && N >= 16 && M-N >= ATL_PCAMin &&
          ((size_t)ATL_MulBySize(M)*N) <= CacheEdge*ATL_NTHREADS)
      {
         if (N >= 16)
            ierr = Mjoin(PATL,tgetf2)(M, N, A, lda, ipiv);
         else
            ierr = Mjoin(PATL,tgetf2_nocp)(M, N, A, lda, ipiv);
         return(ierr);
      }
   #endif
   if (MN > ATL_luMmin)
   {
      Nleft = MN >> 1;
      #ifdef NB
         if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
      #endif
      Nright = N - Nleft;
      i = ATL_getrfC(M, Nleft, A, lda, ipiv);  /* factor left to L & U */
      if (i) if (!ierr) ierr = i;
/*
 *    Update trailing submatrix
 */
      Ac = A + (Nleft * lda SHIFT);
      An = Ac + (Nleft SHIFT);
      ATL_laswp(Nright, Ac, lda, 0, Nleft, ipiv, 1);
      cblas_trsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
                 Nleft, Nright, one, A, lda, Ac, lda);
      cblas_gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M-Nleft, Nright,
                 Nleft, none, A+(Nleft SHIFT), lda, Ac, lda, one, An, lda);
      i = ATL_getrfC(M-Nleft, Nright, An, lda, ipiv+Nleft);
      if (i) if (!ierr) ierr = i + Nleft;
      for (i=Nleft; i != MN; i++) ipiv[i] += Nleft;
      ATL_laswp(Nleft, A, lda, Nleft, MN, ipiv, 1);
   }
Exemplo n.º 3
0
int ATL_trtriRL(const enum ATLAS_DIAG Diag, const int N, TYPE *A, const int lda)
{
  int ierr = 0;

   TYPE *Age, *Atr;
   TYPE tmp;
   int Nleft, Nright;
   #ifdef TREAL
      #define one ATL_rone
      #define mone -ATL_rone
      #define none ATL_rnone
   #else
      static const TYPE one[2] = {ATL_rone, ATL_rzero};
      static const TYPE mone[2] = {-ATL_rone, ATL_rzero};
      static const TYPE none[2] = {ATL_rnone, ATL_rzero};
   #endif

#ifdef TREAL
   if (N > REAL_RECURSE_LIMIT)
#else
   if (N > 1)
#endif
   {
      Nleft = N >> 1;
      #ifdef NB
         if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
      #endif
      Nright = N - Nleft;

      Age = A + ((Nleft*lda) SHIFT);
      Atr = A + (Nleft * (lda+1) SHIFT);

      cblas_trsm(AtlasRowMajor, AtlasRight, AtlasLower, AtlasNoTrans, Diag,
                 Nright, Nleft, one, A, lda, Age, lda);

      cblas_trsm(AtlasRowMajor, AtlasLeft, AtlasLower, AtlasNoTrans, Diag,
                 Nright, Nleft, mone, Atr, lda, Age, lda);

      ierr = ATL_trtriRL(Diag, Nleft, A, lda);
      if (ierr!=0) return(ierr);
      ierr = ATL_trtriRL(Diag, Nright, Atr, lda);
      if (ierr!=0) return(ierr+Nleft);

   }
Exemplo n.º 4
0
int ATL_getrfR(const int M, const int N, TYPE *A, const int lda, int *ipiv)
/*
 * Row-major factorization of form
 *   A = L * U * P
 * where P is a column-permutation matrix, L is lower triangular (lower
 * trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
 * trapazoidal if M < N).  This is the recursive Level 3 BLAS version.
 */
{
   const int MN = Mmin(M, N);
   int Nup, Ndown, i, ierr=0;
   #ifdef TCPLX
      const TYPE one[2] = {ATL_rone, ATL_rzero};
      const TYPE none[2] = {ATL_rnone, ATL_rzero};
      TYPE inv[2], tmp[2];
   #else
      #define one ATL_rone
      #define none ATL_rnone
      TYPE tmp;
   #endif
   TYPE *Ar, *Ac, *An;

   if (MN > 1)
   {
      Nup = MN >> 1;
      #ifdef NB
         if (Nup > NB) Nup = ATL_MulByNB(ATL_DivByNB(Nup));
      #endif
      Ndown = M - Nup;
      i = ATL_getrfR(Nup, N, A, lda, ipiv);
      if (i) if (!ierr) ierr = i;
      Ar = A + (Nup * lda SHIFT);
      Ac = A + (Nup SHIFT);
      An = Ar + (Nup SHIFT);

      ATL_laswp(Ndown, Ar, lda, 0, Nup, ipiv, 1);  /* apply pivots */
      cblas_trsm(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans,
                 CblasUnit, Ndown, Nup, one, A, lda, Ar, lda);
      cblas_gemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, Ndown, N-Nup, Nup,
                 none, Ar, lda, Ac, lda, one, An, lda);

      i = ATL_getrfR(Ndown, N-Nup, An, lda, ipiv+Nup);
      if (i) if (!ierr) ierr = Nup + i;
      for (i=Nup; i != MN; i++) ipiv[i] += Nup;
      ATL_laswp(Nup, A, lda, Nup, MN, ipiv, 1);  /* apply pivots */
   }
Exemplo n.º 5
0
int Mjoin(PATL,trtrs)
   (const enum ATLAS_UPLO Uplo, const enum ATLAS_TRANS TA,
    const enum ATLAS_DIAG Diag, ATL_CINT N, ATL_CINT NRHS,
    const TYPE *A, ATL_CINT lda, TYPE *B, ATL_CINT ldb)
/*
 * Checks for singularity, and then solves system:
 *   A * X = B or A^T * X = B
 * where A is a triangular matrix (as indicated by Uplo).
 * RETURNS :
 *   0 : successful exit
 *  <0 : argument #(-return) had illegal value (start counting from 1)
 *  >0 : diag elt # (1st elt 1) was zero, so A is singular
 */
{
   #ifdef TCPLX
      TYPE one[2] = {ATL_rone, ATL_rzero};
      ATL_CINT N2=N+N;
   #else
      #define one ATL_rone
   #endif
   ATL_CINT ldap1 = (lda+1)SHIFT;
   ATL_INT i;
/*
 * Zero on diagonal means singular triangular matrix
 */
   if (Diag != AtlasUnit)
   {
      #ifdef TCPLX
         for (i=0; i < N2; i += 2, A += ldap1)
            if (SCALAR_IS_ZERO(A))
               return((i>>1)+1);
      #else
         for (i=0; i < N; i++, A += ldap1)
            if (*A == ATL_rzero)
               return(i+1);
      #endif
      A -= ldap1*N;
   }
   cblas_trsm(CblasColMajor, AtlasLeft, Uplo, TA, Diag, N, NRHS, one,
              A, lda, B, ldb);
   return(0);
}
Exemplo n.º 6
0
int ATL_potrfL(const int N, TYPE *A, const int lda)
{
   TYPE *An, *Ar;
   const size_t lda2=(lda SHIFT);
   int Nleft, Nright, ierr;
   #ifdef TREAL
      #define lda2 lda
      #define ONE ATL_rone
   #else
      static const TYPE ONE[2] = {ATL_rone, ATL_rzero};
   #endif

#ifdef TREAL
   if (N > 4)
#else
   if (N > 1)
#endif
   {
      Nleft = N >> 1;
      #ifdef NB
         if (Nleft > NB<<1) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
      #endif
      Nright = N - Nleft;
      ierr = ATL_potrfL(Nleft, A, lda);
      if (!ierr)
      {
         Ar = A + (Nleft SHIFT);
         An = Ar + lda2 * Nleft;
         cblas_trsm(CblasColMajor, CblasRight, CblasLower, llt_trans,
                    CblasNonUnit, Nright, Nleft, ONE, A, lda, Ar, lda);
         llt_syrk(CblasColMajor, CblasLower, CblasNoTrans, Nright, Nleft,
                  ATL_rnone, Ar, lda, ATL_rone, An, lda);
         ierr = ATL_potrfL(Nright, An, lda);
         if (ierr) return(ierr+Nleft);
      }
      else return(ierr);
   }
Exemplo n.º 7
0
void ATL_getrs(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans,
               const int N, const int NRHS, const TYPE *A, const int lda,
               const int *ipiv, TYPE *B, const int ldb)
/*
 * OK, this pivoting crap is tricky.  The trick is, when we pivot columns
 * of the matrix, this effects X but not B, and when we pivot rows, this
 * effects B, but not X.  So, must never attempt to apply a Pr
 * (row permutation matrix) to X or a Pc to B.
 */
{
   enum CBLAS_DIAG Lunit, Uunit;
   #ifdef TREAL
      #define one ATL_rone
   #else
      const TYPE one[2] = {ATL_rone, ATL_rzero};
   #endif

   if (!N || !NRHS) return;

   if (Order == CblasColMajor)
   {
/*
 *    A*X = B.  Since we have pivoted A by Pr (PA=LU), we pivot B by Pr,
 *    **and this does not effect X at all**, so we solve
 *    X = inv(U)*inv(L)*(Pr * B)
 */
      if (Trans == CblasNoTrans)
      {
         ATL_laswp(NRHS, B, ldb, 0, N, ipiv, 1);
         cblas_trsm(Order, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
                    N, NRHS, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit,
                    N, NRHS, one, A, lda, B, ldb);
      }
/*
 *    trans(L*U = PA)  ==>  U' L' = A' P, so P is Pc, and does not effect B,
 *    U' L' Pc X = B  ==> Pc X = inv(L') * inv(U') * B, but we want
 *    X, not Pc X, so we apply inv(Pc) after doing these steps.
 */
      else
      {
         cblas_trsm(Order, CblasLeft, CblasUpper, Trans, CblasNonUnit,
                    N, NRHS, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasLeft, CblasLower, Trans, CblasUnit,
                    N, NRHS, one, A, lda, B, ldb);
         ATL_laswp(NRHS, B, ldb, 0, N, ipiv, -1);
      }
   }
/*
 * For row-major arrays, we actually have X^T and B^T, so must tranpose
 * both sides of equation, so what we are solving is:  X' * A' = B'
 */
   else
   {
/*
 *    A = LU*inv(Pc), X' * (LU*inv(Pc))' = B'  ==>  X' * inv(Pc) * U' * L' = B'
 *    X' * inv(Pc) = U' * L' * B', so apply inv(Pc) after solves.
 */
      if (Trans == CblasNoTrans)
      {
         cblas_trsm(Order, CblasRight, CblasLower, CblasTrans, CblasNonUnit,
                    NRHS, N, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasRight, CblasUpper, CblasTrans, CblasUnit,
                    NRHS, N, one, A, lda, B, ldb);
         ATL_laswp(NRHS, B, ldb, 0, N, ipiv, -1);
      }
/*
 *    A' = (LU*inv(Pc))', but Pc is on rows of non-trans matrix, so:
 *    X' * (inv(Pr)*L*U) = B'
 *    X' = (Pr * B') * inv(U) * inv(L)
 *    NOTE: this case is untested
 */
      else
      {
         ATL_laswp(NRHS, B, ldb, 0, N, ipiv, 1);
         cblas_trsm(Order, CblasRight, CblasUpper, CblasNoTrans, CblasUnit,
                    NRHS, N, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit,
                    NRHS, N, one, A, lda, B, ldb);
      }
   }
}
Exemplo n.º 8
0
void ATL_potrs(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
               const int N, const int NRHS, const TYPE *A, const int lda,
               TYPE *B, const int ldb)
{
   #ifdef TCPLX
      int j;
      const int ldb2 = ldb+ldb;
      const TYPE one[2] = {ATL_rone, ATL_rzero};
   #else
      #define one ATL_rone
   #endif

   if (!N || !NRHS) return;
   if (Order == CblasColMajor)
   {
/*
 *    Solve X = inv(U) * inv(U') * B
 */
      if (Uplo == AtlasUpper)
      {
         cblas_trsm(Order, CblasLeft, CblasUpper, MyTrans, CblasNonUnit,
                    N, NRHS, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit,
                    N, NRHS, one, A, lda, B, ldb);
      }
/*
 *    Solve X = inv(L') * inv(L) * B
 */
      else
      {
         cblas_trsm(Order, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit,
                    N, NRHS, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasLeft, CblasLower, MyTrans, CblasNonUnit,
                    N, NRHS, one, A, lda, B, ldb);
      }
   }
/*
 * For row-major, remember we have x' and b', so we must transpose usual
 * equations
 */
   else
   {
      #ifdef TCPLX
         for (j=0; j < NRHS; j++)
            Mjoin(PATLU,scal)(N, -1.0, B+j*ldb2+1, 2);
      #endif
/*
 *    solve x^T = b^T * inv(U) * inv(U^T)
 *    conj( x^H = b^H * inv(U) * inv(U^H) )  (complex)
 */
      if (Uplo == CblasUpper)
      {
         cblas_trsm(Order, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit,
                    NRHS, N, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasRight, CblasUpper, MyTrans, CblasNonUnit,
                    NRHS, N, one, A, lda, B, ldb);
      }
/*
 *    solve x^T = b^T * inv(L^T) * inv(L)
 *    conj( x^H = b^H * inv(L^H) * inv(L) )  (complex)
 */
      else
      {
         cblas_trsm(Order, CblasRight, CblasLower, MyTrans, CblasNonUnit,
                    NRHS, N, one, A, lda, B, ldb);
         cblas_trsm(Order, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit,
                    NRHS, N, one, A, lda, B, ldb);
      }
      #ifdef TCPLX
         for (j=0; j < NRHS; j++)
            Mjoin(PATLU,scal)(N, -1.0, B+j*ldb2+1, 2);
      #endif
   }
}