Exemple #1
0
static int L2LU(const int M, const int N, TYPE *A, const int lda, int *ipiv)
/*
 * Complex Level 2 based left-looking LU
 */
{
   TYPE *Ac=A;
   TYPE t0, tmp[2];
   const TYPE one[2] = {ATL_rone, ATL_rzero}, none[2] = {ATL_rnone, ATL_rzero};
   const int MN=Mmin(M,N), MN_1=MN-1, lda2=lda+lda;
   int ip, ip2, j, j2, jn, jn2, iret=0;

   for (j=0, j2=0, jn=1, jn2=2; j != MN; j=jn++, j2 += 2, jn2 += 2)
   {
      ipiv[j] = ip = j + cblas_iamax(M-j, Ac+j2, 1);
      ip2 = ip + ip;
      if (Ac[ip2] != ATL_rzero || Ac[ip2+1] != ATL_rzero)
      {
         Mjoin(PATL,cplxinvert)(1, Ac+ip2, 1, tmp, 1);
         cblas_swap(N, A+j2, lda, A+ip2, lda);
         cblas_scal(M-jn, tmp, Ac+jn2, 1);
         if (j != MN_1)
         {
            Ac += lda2;
            cblas_trsv(CblasColMajor, CblasLower, CblasNoTrans, CblasUnit, jn,
                       A, lda, Ac, 1);
            cblas_gemv(CblasColMajor, CblasNoTrans, M-jn, jn, none,
                       A+jn2, lda, Ac, 1, one, Ac+jn2, 1);
         }
      }
      else if (!iret) iret = jn;
   }
   return(iret);
}
Exemple #2
0
int ATL_getriC(const int N, TYPE *A, const int lda, const int *ipiv,
               TYPE *wrk, const int lwrk)
{
   const int lda2 = lda SHIFT;
   int J, jb, nb, nright, iret;
   TYPE *A0 = A;
   #ifdef TREAL
      const TYPE one=ATL_rone, none=ATL_rnone;
   #else
      const TYPE one[2]={ATL_rone,ATL_rzero}, none[2]={ATL_rnone, ATL_rzero};
   #endif

   iret = ATL_trtri(CblasColMajor, CblasUpper, CblasNonUnit, N, A, lda);
   if (!iret && N > 1)
   {
/*
 *    Find largest NB we can use with our provided workspace
 */
      jb = lwrk / N;
      if (jb >= NB) nb = ATL_MulByNB(ATL_DivByNB(jb));
      else if (jb >= ATL_mmNU) nb = (jb/ATL_mmNU)*ATL_mmNU;
      else nb = jb;
      if (!nb) return(-6);  /* need at least 1 col of workspace */
/*
 *    Only first iteration will have partial block, unroll it
 */
      jb = N - (N/nb)*nb;
      if (!jb) jb = nb;
      J = N - jb;
      A += lda2*J;
      trcpzeroL(jb, jb, A+(J SHIFT), lda, wrk, jb);
      cblas_trsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasUnit,
                 N, jb, one, wrk, jb, A, lda);
      if (J)
      {
         do
         {
            J -= nb;
            A -= nb*lda2;
            nright = N-J;
            trcpzeroL(nright, nb, A+(J SHIFT), lda, wrk, nright);
            cblas_gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, nb,
                       nright-nb, none, A+nb*lda2, lda, wrk+(nb SHIFT), nright,
                       one, A, lda);
            cblas_trsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,
                       CblasUnit, N, nb, one, wrk, nright, A, lda);
         }
         while(J);
      }
/*
 *    Apply column interchanges
 */
      for (J=N-2; J >= 0; J--)
      {
         jb = ipiv[J];
         if (jb != J) cblas_swap(N, A+J*lda2, 1, A+jb*lda2, 1);
      }
   }
   return(iret);
}
Exemple #3
0
static int LU1(ATL_CINT M, ATL_CINT N, ATL_CINT j, TYPE *A, ATL_CINT lda,
               int *ipiv)
/*
 * Performs an LU factorization on jth column.  N is the full width of
 * column panel, A is ptr to beginning of panel.
 * RETURNS: 0 on success, non-zero if no non-zero pivot exists
 */
{
   #ifdef TCPLX
      ATL_CINT lda2 = lda+lda;
      TYPE invs[2];
      const TYPE none[2] = {ATL_rnone, ATL_rzero};
   #else
      #define lda2 lda
      #define none ATL_rnone
   #endif
   TYPE *Ac = A + j*lda2;  /* active column */
   TYPE pivval=Ac[j];
   ATL_INT ip;

   ipiv[j] = ip = j + cblas_iamax(M-j, Ac+(j SHIFT), 1);
   #ifdef TCPLX
      pivval = Mabs(Ac[ip+ip]) + Mabs(Ac[ip+ip+1]);
   #else
      pivval = Ac[ip];
   #endif
   if (pivval != ATL_rzero)
   {
      if (ip != j)
         cblas_swap(N, A+(j SHIFT), lda, A+(ip SHIFT), lda);
      #ifdef TCPLX
         if (pivval >= ATL_laSAFMIN)
         {
            TYPE invs[2];
            Mjoin(PATL,cplxinvert)(1, Ac+j+j, 1, invs, 1);
            cblas_scal(M-j-1, invs, Ac+j+j+2, 1);
         }
         else
            Mjoin(PATL,cplxdivide)(M-j-1, Ac+j+j, Ac+j+j+2, 1, Ac+j+j+2, 1);
      #else
         if (Mabs(pivval) >= ATL_laSAFMIN)
            cblas_scal(M-j-1, ATL_rone/pivval, Ac+j+1, 1);
         else
         {
            ATL_INT i;
            for (i=j+1; i < M; i++)
               Ac[j] /= pivval;
         }
      #endif
      return(0);
   }
   return(1);
}