Beispiel #1
0
static void geinv
   (const enum CBLAS_ORDER Order, const int N, TYPE *A, const int lda)
{
   int *ipiv;
   TYPE *wrk;
   int lwrk;

   ipiv = malloc(sizeof(int)*N);
   ATL_assert(ipiv);
   #ifdef TimeF77
      lwrk = N * Mjoin(PATL,GetNB)();
      wrk = malloc(ATL_MulBySize(lwrk));
      if (Order == AtlasRowMajor) Mjoin(PATL,tstsqtran)(N, A, lda);
      ATL_assert(Mjoin(PATL,f77getrf)(AtlasColMajor, N, N, A, lda, ipiv) == 0);
      ATL_assert(Mjoin(PATL,f77getri)
         (AtlasColMajor, N, A, lda, ipiv, wrk, &lwrk) == 0);
      if (Order == AtlasRowMajor) Mjoin(PATL,tstsqtran)(N, A, lda);
      free(wrk);
   #elif defined(TimeC)
      ATL_assert(Mjoin(CLP,getrf)(Order, N, N, A, lda, ipiv) == 0);
      ATL_assert(Mjoin(CLP,getri)(Order, N, A, lda, ipiv) == 0);
  #else
      lwrk = N * Mjoin(PATL,GetNB)();
      wrk = malloc(ATL_MulBySize(lwrk));
      ATL_assert(Mjoin(PATL,getrf)(Order, N, N, A, lda, ipiv) == 0);
      ATL_assert(Mjoin(PATL,getri)(Order, N, A, lda, ipiv, wrk, &lwrk) == 0);
      free(wrk);
   #endif
   free(ipiv);
}
Beispiel #2
0
static TYPE *DupMat(enum ATLAS_ORDER Order, int M, int N, TYPE *A, int lda,
                    int ldc)
/*
 * returns a duplicate of the A matrix, with new leading dimension
 */
{
   int i, j, M2;
   const int ldc2 = (ldc SHIFT), lda2 = (lda SHIFT);
   TYPE *C;
   if (Order == CblasRowMajor)
   {
      i = M;
      M = N;
      N = i;
   }
   M2 = M SHIFT;
   ATL_assert(ldc >= M);
   C = malloc(ATL_MulBySize(ldc)*N);
   ATL_assert(C);
   #if defined(ATL_USEPTHREADS) && !defined(ATL_NONUMATOUCH)
      ATL_NumaTouchSpread(ATL_MulBySize(ldc)*N, C);
   #endif
   for (j=0; j != N; j++)
   {
      for (i=0; i != M2; i++) C[i] = A[i];
      C += ldc2;
      A += lda2;
   }
   return(C-N*ldc2);
}
Beispiel #3
0
void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                 const int N, const void *alpha,
                 const void *X, const int incX,
                 const void *Y, const int incY, void *A, const int lda)
{
   int info = 2000;
   void *vx, *vy;
   float *x0, *y0;
   const float *x=X, *y=Y, *alp=alpha;
   const float one[2]={ATL_rone, ATL_rzero};

#ifndef NoCblasErrorChecks
   if (Order != CblasColMajor && Order != CblasRowMajor)
      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
                          CblasRowMajor, CblasColMajor, Order);
   if (Uplo != CblasUpper && Uplo != CblasLower)
      info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d",
                          CblasUpper, CblasLower, Uplo);
   if (N < 0) info = cblas_errprn(3, info,
                        "N cannot be less than zero; is set to %d.", N);
   if (!incX) info = cblas_errprn(6, info,
                                  "incX cannot be zero; is set to %d.", incX);
   if (!incY) info = cblas_errprn(8, info,
                                  "incY cannot be zero; is set to %d.", incY);
   if (lda < N || lda < 1)
      info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d N=%d",
                          lda, N);
   if (info != 2000)
   {
      cblas_xerbla(info, "cblas_cher2", "");
      return;
   }
#endif

   if (incX < 0) x += (1-N)*incX<<1;
   if (incY < 0) y += (1-N)*incY<<1;

   if (Order == CblasColMajor)
      ATL_cher2(Uplo, N, alpha, x, incX, y, incY, A, lda);
   else if (alp[0] != ATL_rzero || alp[1] != ATL_rzero)
   {
      vx = malloc(ATL_Cachelen + ATL_MulBySize(N));
      vy = malloc(ATL_Cachelen + ATL_MulBySize(N));
      ATL_assert(vx != NULL && vy != NULL);
      x0 = ATL_AlignPtr(vx);
      y0 = ATL_AlignPtr(vy);
      ATL_cmoveConj(N, alpha, y, incY, y0, 1);
      ATL_ccopyConj(N, x, incX, x0, 1);
      ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
                N, one, y0, 1, x0, 1, A, lda);
      free(vx);
      free(vy);
   }
   else ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
                  N, alpha, y, incY, x, incX, A, lda);
}
Beispiel #4
0
static TYPE uumtest(enum ATLAS_ORDER Order, enum ATLAS_UPLO Uplo,
                    int CacheSize, int N, int lda, double *tim)
{
   TYPE *A, *Ag, *LmLt;
   double t0, t1;
   TYPE normA, eps, resid;
   enum ATLAS_UPLO MyUplo = Uplo;

   if (Order == CblasRowMajor)
   {
      if (Uplo == CblasUpper) MyUplo = CblasLower;
      else MyUplo = CblasUpper;
   }
   eps = Mjoin(PATL,epsilon)();
   A = malloc(ATL_MulBySize(lda)*N + ATL_MulBySize(N)*N);
   if (A == NULL) return(-1);
   Ag = A + lda*(N SHIFT);
   t0 = ATL_flushcache(CacheSize);
   lltgen(MyUplo, N, A, lda, N*1029+lda);
   lltgen(MyUplo, N, Ag, N, N*1029+lda);
   normA = lltnrm1(MyUplo, N, A, lda);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("A", N, N, A, lda);
      Mjoin(PATL,geprint)("Ag", N, N, Ag, N);
   #endif

   t0 = ATL_flushcache(-1);

   t0 = time00();
   test_lauum(Order, Uplo, N, A, lda);
   t1 = time00() - t0;
   *tim = t1;

   t0 = ATL_flushcache(0);

   ATL_checkpad(MyUplo, N, A, lda);
   if (Uplo == CblasUpper) LmLt = ATL_UmulUt(Order, N, Ag, N);
   else LmLt = ATL_LtmulL(Order, N, Ag, N);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("A", N, N, A, lda);
      Mjoin(PATL,geprint)("Ag", N, N, LmLt, N);
   #endif
   lltdiff(MyUplo, N, A, lda, LmLt, N);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("A-L*Lt", N, N, LmLt, N);
   #endif
   resid = lltnrm1(MyUplo, N, LmLt, N) / (normA * eps * N);
   if (resid > 10.0 || resid != resid)
      fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid);

   free(LmLt);
   free(A);

   return(resid);
}
Beispiel #5
0
TYPE *GetGE(int M, int N, int lda)
{
   TYPE *A;
   A = malloc(ATL_MulBySize(lda)*N);
   if (A)
   {
      #if defined(ATL_USEPTHREADS) && !defined(ATL_NONUMATOUCH)
         ATL_NumaTouchSpread(ATL_MulBySize(lda)*N, A);
      #endif
      Mjoin(PATL,gegen)(M, N, A, lda, M*N+lda);
   }
   return(A);
}
Beispiel #6
0
void Mjoin(Mjoin(Mjoin(PATL,syrk),UploNM),T)
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *vbeta, void *C, const int ldc)
{
   void *vc;
   TYPE *c;
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR beta =*( (const SCALAR *)vbeta  );
      const SCALAR one=1.0, zero=0.0;
   #else
      #define alpha valpha
      const TYPE *beta=vbeta;
      const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};
   #endif

   if (K > SYRK_Xover)
   {
      vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);
      ATL_assert(vc);
      c = ATL_AlignPtr(vc);
      CgemmTN(N, N, K, alpha, A, lda, A, lda, zero, c, N);
      if ( SCALAR_IS_ONE(beta) ) Mjoin(syr_put,_b1)(N, c, beta, C, ldc);
      else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr_put,_b0)(N, c, beta, C, ldc);
      #ifdef TCPLX
         else if ( SCALAR_IS_NONE(beta) )
            Mjoin(syr_put,_bn1)(N, c, beta, C, ldc);
         else if (beta[1] == *zero) Mjoin(syr_put,_bXi0)(N, c, beta, C, ldc);
      #endif
      else Mjoin(syr_put,_bX)(N, c, beta, C, ldc);
      free(vc);
   }
   else Mjoin(PATL,refsyrk)(Uplo_, AtlasTrans, N, K, alpha, A, lda,
                            beta, C, ldc);
}
Beispiel #7
0
TYPE *GetGE(int M, int N, int lda)
{
   TYPE *A;
   A = malloc(ATL_MulBySize(lda)*N);
   if (A) Mjoin(PATL,gegen)(M, N, A, lda, M*N+lda);
   return(A);
}
Beispiel #8
0
void Mjoin(Mjoin(PATL,trmmL),ATLP)
   (const int M, const int N, const void *valpha, const void *A, const int lda,
    void *C, const int ldc)
{
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR one=1.0, zero=0.0;
   #else
      const TYPE zero[2]={0.0,0.0};
      #define alpha valpha
   #endif
   void *va;
   TYPE *a;

   if (N > TRMM_Xover)
   {
      va = malloc(ATL_Cachelen + ATL_MulBySize(M)*M);
      ATL_assert(va);
      a = ATL_AlignPtr(va);
      #ifdef TREAL
         if ( SCALAR_IS_ONE(alpha) ) Mjoin(ATL_trcopy,_a1)(M, alpha, A, lda, a);
         else Mjoin(ATL_trcopy,_aX)(M, alpha, A, lda, a);
         CAgemmTN(M, N, M, one, a, M, C, ldc, zero, C, ldc);
      #else
         ATL_trcopy(M, A, lda, a);
         CAgemmTN(M, N, M, valpha, a, M, C, ldc, zero, C, ldc);
      #endif
      free(va);
   }
   else Mjoin(PATL,reftrmm)(AtlasLeft, Uplo_, Trans_, Unit_, M, N, alpha,
                            A, lda, C, ldc);
}
Beispiel #9
0
      int Mjoin(PATL,her2kLN)
   #endif
#endif
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
   int i;
   void *vc=NULL;
   TYPE *c;
   const TYPE beta =*( (const TYPE *)vbeta  );
   const TYPE zero[2]={0.0, 0.0};

   i = ATL_MulBySize(N)*N;
   if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i);
   if (vc == NULL) return(1);
   c = ATL_AlignPtr(vc);
   #ifdef Transpose_
      ATL_ammm(AtlasConjTrans, AtlasNoTrans, N, N, K, valpha, A, lda, B, ldb,
   #else
      ATL_ammm(AtlasNoTrans, AtlasConjTrans, N, N, K, valpha, A, lda, B, ldb,
   #endif
               zero, c, N);
   if ( beta == 1.0 ) Mjoin(her2k_put,_b1)(N, c, vbeta, C, ldc);
   else if ( beta == 0.0 ) Mjoin(her2k_put,_b0)(N, c, vbeta, C, ldc);
   else Mjoin(her2k_put,_bXi0)(N, c, vbeta, C, ldc);
   free(vc);
   return(0);
}
Beispiel #10
0
void Mjoin(Mjoin(PATL,symmL),UploNM)
(const int M, const int N, const void *valpha, const void *A, const int lda,
 const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
#ifdef TREAL
    const SCALAR alpha=*( (const SCALAR *)valpha );
    const SCALAR beta =*( (const SCALAR *)vbeta  );
    const SCALAR one=1.0;
#else
#define alpha valpha
#define beta vbeta
#endif
    TYPE *a;
    void *va;

    if (N > SYMM_Xover)
    {
        va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M));
        ATL_assert(va);
        a = ATL_AlignPtr(va);
#ifdef TREAL
        if ( SCALAR_IS_ONE(alpha) )
            Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(M, alpha, A, lda, a);
        else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(M, alpha, A, lda, a);
        CgemmTN(M, N, M, one, a, M, B, ldb, beta, C, ldc);
#else
        Mjoin(Mjoin(PATL,sycopy),UploNM)(M, A, lda, a);
        CgemmTN(M, N, M, valpha, a, M, B, ldb, vbeta, C, ldc);
#endif
        free(va);
    }
    else Mjoin(PATL,refsymm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb,
                                 beta, C, ldc);
}
Beispiel #11
0
void Mjoin(Mjoin(PATL,symmR),UploNM)
   (const int M, const int N, const void *valpha, const void *A, const int lda,
    const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR beta =*( (const SCALAR *)vbeta  );
      const SCALAR one=1.0;
   #else
      #define alpha valpha
      #define beta  vbeta
   #endif
   void *va;
   TYPE *a;

   if (M > SYMM_Xover)
   {
      va = malloc(ATL_Cachelen + ATL_MulBySize(N)*N);
      ATL_assert(va);
      a = ATL_AlignPtr(va);
      #ifdef TREAL
         if ( SCALAR_IS_ONE(alpha) )
            Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(N, alpha, A, lda, a);
         else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(N, alpha, A, lda, a);
         ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, one, B, ldb, a, N, beta, C, ldc);
      #else
         Mjoin(Mjoin(PATL,sycopy),UploNM)(N, A, lda, a);
         ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, valpha, B, ldb, a, N, vbeta, C, ldc);
      #endif
      free(va);
   }
   else Mjoin(PATL,refsymm)(AtlasRight, Uplo_, M, N, alpha, A, lda, B, ldb,
                            beta, C, ldc);
}
Beispiel #12
0
static TYPE geresid(enum CBLAS_ORDER Order, int N, TYPE *A, int lda,
                    TYPE *AI, int ldi)
/*
 * returns ||A - AI|| / (N * eps * ||A|| * ||AI||);
 * for row-major, we are not using 1-norm, since we are adding rows instead
 * of cols, but it should be an equally good norm, so don't worry about it.
 */
{
   TYPE numer, denom, eps;
   const int ldcp1 = (N+1)SHIFT;
   TYPE *C;
   int i;

   #ifdef TREAL
      TYPE one = ATL_rone, zero = ATL_rzero;
   #else
      TYPE one[2] = {ATL_rone, ATL_rzero}, zero[2] = {ATL_rzero, ATL_rzero};
   #endif

   eps = Mjoin(PATL,epsilon)();
   C = malloc(N*ATL_MulBySize(N));
   ATL_assert(C);
   cblas_gemm(Order, CblasNoTrans, CblasNoTrans, N, N, N, one, A, lda,
              AI, ldi, zero, C, N);                /* C now has A*inv(A) */
   for (i=0; i != N; i++) C[i*ldcp1] -= ATL_rone;  /* C now has A*inv(A)-I */
   numer = Mjoin(PATL,genrm1)(N, N, C, N);
   denom = Mjoin(PATL,genrm1)(N, N, A, lda) *
           Mjoin(PATL,genrm1)(N, N, AI, ldi) * N * eps;
   free(C);
   return(numer/denom);
}
Beispiel #13
0
static TYPE *ATL_LmulLt(const int N, const TYPE *L, const int ldl)
/*
 * A = L * L^H
 */
{
   const int incA = 1 SHIFT, incL = (ldl+1) SHIFT;
   TYPE *A;
   int i, j;
   #ifdef TCPLX
      int i1, i2;
      TYPE tmp;
   #endif

   A = malloc(N*ATL_MulBySize(N));
   ATL_assert(A);
   for (j=0; j < N; j++)
   {
      for (i=j; i < N; i++)
      {
      #ifdef TREAL
         A[i+j*N] = L[i+j*ldl] * L[j+j*ldl] +
                    Mjoin(PATL,dot)(j, L+i, ldl, L+j, ldl);
      #else
         tmp = L[(j+j*ldl)<<1];
         i1 = (i + j * N)<<1;
         i2 = (i + j * ldl)<<1;
         Mjoin(PATL,dotc_sub)(j, L+(j<<1), ldl, L+(i<<1), ldl, A+i1);
         A[i1] += L[i2] * tmp;
         if (i != j) A[i1+1] += tmp * L[i2+1];
      #endif
      }
   }
   return(A);
}
Beispiel #14
0
   int Mjoin(PATL,syr2kLT)
#endif
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
   int i;
   void *vc=NULL;
   TYPE *c;
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR beta =*( (const SCALAR *)vbeta  );
      const SCALAR one=1.0, zero=0.0;
   #else
      #define alpha valpha
      const TYPE *beta=vbeta;
      const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};
   #endif

   i = ATL_MulBySize(N)*N;
   if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i);
   if (vc == NULL) return(1);
   c = ATL_AlignPtr(vc);
   CgemmTN(N, N, K, alpha, A, lda, B, ldb, zero, c, N);
   if ( SCALAR_IS_ONE(beta) ) Mjoin(syr2k_put,_b1)(N, c, beta, C, ldc);
   else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr2k_put,_b0)(N, c, beta, C, ldc);
   #ifdef TCPLX
      else if (SCALAR_IS_NONE(beta)) Mjoin(syr2k_put,_bn1)(N, c, beta, C, ldc);
      else if (beta[1] == *zero) Mjoin(syr2k_put,_bXi0)(N, c, beta, C, ldc);
   #endif
   else Mjoin(syr2k_put,_bX)(N, c, beta, C, ldc);
   free(vc);
   return(0);
}
Beispiel #15
0
void Mjoin(Mjoin(Mjoin(PATL,herk),UploNM),N)
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *vbeta, void *C, const int ldc)
{
   void *vc;
   TYPE *c;
   TYPE alpha[2];
   const TYPE beta = *( (const TYPE *)vbeta  );
   const TYPE zero[2] = {0.0, 0.0};

   alpha[0] = *( (const TYPE *)valpha );
   if (K > HERK_Xover)
   {
      alpha[1] = 0.0;
      vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);
      ATL_assert(vc);
      c = ATL_AlignPtr(vc);
      CgemmNC(N, N, K, alpha, A, lda, A, lda, zero, c, N);
      if ( beta == 1.0 ) Mjoin(her_put,_b1)(N, c, vbeta, C, ldc);
      else if ( beta == 0.0 ) Mjoin(her_put,_b0)(N, c, vbeta, C, ldc);
      else Mjoin(her_put,_bXi0)(N, c, vbeta, C, ldc);
      free(vc);
   }
   else Mjoin(PATL,refherk)(Uplo_, AtlasNoTrans, N, K, *alpha, A, lda,
                            beta, C, ldc);
}
Beispiel #16
0
static double RunTiming
   (enum CBLAS_ORDER Order, enum TEST_UPLO Uplo, int N, int lda,
    int CacheSize, int nreps)
{
   TYPE *A, *a;
   const int incA = N*lda;
   int i, k;
   double t0, t1=0.0;

   if (nreps < 1) nreps = 1;
   i = ATL_DivBySize(2*CacheSize) ATL_PTCACHEMUL;
   k = i = (i + N*N-1) / (N*N);
   if (nreps > i) k = i = nreps;
   a = A = malloc(i * ATL_MulBySize(incA));
   if (A)
   {
      if (Uplo == TestGE)
         for (i=0; i < k; i++) Mjoin(PATL,gegen)(N, N, A+i*incA, lda, N+lda);
      else for (i=0; i < k; i++) hegen(Order, Uplo, N, A+i*incA, lda);

      t0 = time00();
      for (i=nreps; i; i--, a += incA) test_inv(Order, Uplo, N, a, lda);
      t1 = time00() - t0;
      free(A);
   }
   else fprintf(stderr, "   WARNING: not enough mem to run timings!\n");
   return(t1/nreps);
}
Beispiel #17
0
int ATL_getrfC(const int M, const int N, TYPE *A, const int lda, int *ipiv)
/*
 * Column-major factorization of form
 *   A = P * L * U
 * where P is a row-permutation matrix, L is lower triangular with unit diagonal
 * elements (lower trapazoidal if M > N), and U is upper triangular (upper
 * trapazoidal if M < N).  This is the recursive Level 3 BLAS version.
 */
{
   const int MN = Mmin(M, N);
   int Nleft, Nright, k, i, ierr=0;
   #ifdef TCPLX
      const TYPE one[2] = {ATL_rone, ATL_rzero};
      const TYPE none[2] = {ATL_rnone, ATL_rzero};
      TYPE inv[2], tmp[2];
   #else
      #define one ATL_rone
      #define none ATL_rnone
      TYPE tmp;
   #endif
   TYPE *Ac, *An;

   if (((size_t)M)*N <= ATL_L1elts)
      return(Mjoin(PATL,getf2)(M, N, A, lda, ipiv));
   #if defined(ATL_USEPTHREADS) && defined(ATL_USEPCA)
      if (N <= (NB<<2) && N >= 16 && M-N >= ATL_PCAMin &&
          ((size_t)ATL_MulBySize(M)*N) <= CacheEdge*ATL_NTHREADS)
      {
         if (N >= 16)
            ierr = Mjoin(PATL,tgetf2)(M, N, A, lda, ipiv);
         else
            ierr = Mjoin(PATL,tgetf2_nocp)(M, N, A, lda, ipiv);
         return(ierr);
      }
   #endif
   if (MN > ATL_luMmin)
   {
      Nleft = MN >> 1;
      #ifdef NB
         if (Nleft > NB) Nleft = ATL_MulByNB(ATL_DivByNB(Nleft));
      #endif
      Nright = N - Nleft;
      i = ATL_getrfC(M, Nleft, A, lda, ipiv);  /* factor left to L & U */
      if (i) if (!ierr) ierr = i;
/*
 *    Update trailing submatrix
 */
      Ac = A + (Nleft * lda SHIFT);
      An = Ac + (Nleft SHIFT);
      ATL_laswp(Nright, Ac, lda, 0, Nleft, ipiv, 1);
      cblas_trsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit,
                 Nleft, Nright, one, A, lda, Ac, lda);
      cblas_gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M-Nleft, Nright,
                 Nleft, none, A+(Nleft SHIFT), lda, Ac, lda, one, An, lda);
      i = ATL_getrfC(M-Nleft, Nright, An, lda, ipiv+Nleft);
      if (i) if (!ierr) ierr = i + Nleft;
      for (i=Nleft; i != MN; i++) ipiv[i] += Nleft;
      ATL_laswp(Nleft, A, lda, Nleft, MN, ipiv, 1);
   }
Beispiel #18
0
void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
                 const void *alpha, const void *X, const int incX,
                 const void *Y, const int incY, void *A, const int lda)
{
   int info = 2000;
   const double *x = X, *y = Y;
   void *vy;
   double *y0;
   double one[2] = {ATL_rone, ATL_rzero};

#ifndef NoCblasErrorChecks
   if (M < 0) info = cblas_errprn(2, info,
                        "M cannot be less than zero; is set to %d.", M);
   if (N < 0) info = cblas_errprn(3, info,
                        "N cannot be less than zero; is set to %d.", N);
   if (!incX) info = cblas_errprn(6, info,
                                  "incX cannot be zero; is set to %d.", incX);
   if (!incY) info = cblas_errprn(8, info,
                                  "incY cannot be zero; is set to %d.", incY);
   if (Order == CblasColMajor)
   {
      if (lda < M || lda < 1)
         info = cblas_errprn(10, info, "lda must be >= MAX(M,1): lda=%d M=%d",
                             lda, M);
   }
   else if (Order == CblasRowMajor)
   {
      if (lda < N || lda < 1)
         info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d M=%d",
                             lda, N);
   }
   else
      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
                          CblasRowMajor, CblasColMajor, Order);
   if (info != 2000)
   {
      cblas_xerbla(info, "cblas_zgerc", "");
      return;
   }
#endif

   if (incX < 0) x += (1-M)*incX<<1;
   if (incY < 0) y += (1-N)*incY<<1;

   if (Order == CblasColMajor)
      ATL_zgerc(M, N, alpha, x, incX, y, incY, A, lda);
   else
   {
      vy = malloc(ATL_Cachelen + ATL_MulBySize(N));
      ATL_assert(vy);
      y0 = ATL_AlignPtr(vy);
      ATL_zmoveConj(N, alpha, y, incY, y0, 1);
      ATL_zgeru(N, M, one, y0, 1, x, incX, A, lda);
      free(vy);
   }
}
Beispiel #19
0
int RunCase(int CacheSize, TYPE thresh, int MFLOP, enum ATLAS_ORDER Order,
            enum ATLAS_UPLO Uplo, int N, int lda)
{
   char *Ups, *Ord;
   TYPE resid = 0.0;
   double mflop, mflops, t0, tim=0.0;
   int nreps=1, passed, i, imem;
   const int incA = lda*N;
   TYPE *a, *A;

   mflops = N;
   mflops = (mflops*mflops*mflops) / 4.0;
   #ifdef TCPLX
      mflops *= 4.0;
   #endif
   mflops /= 1000000.0;

   if (thresh > ATL_rzero) resid =
      uumtest(Order, Uplo, CacheSize, N, lda, &tim);
   else resid = -1.0;

   if (MFLOP > mflops || thresh <= ATL_rzero) /* need to time repetitively */
   {
      nreps = (mflops * 1000000);
      nreps = (MFLOP*1000000 + nreps-1) / nreps;
      if (nreps < 1) nreps = 1;
      imem = ATL_DivBySize(CacheSize) ATL_PTCACHEMUL;
      imem = (imem + 2*N*N-1) / (N*N);
      if (imem < nreps) imem = nreps;
      a = A = malloc(imem * ATL_MulBySize(incA));
      if (A != NULL)
      {
         for (i=0; i < imem; i++) lltgen(Uplo, N, A+i*incA, lda, N*1029+lda);
         t0 = time00();
         for (i=nreps; i; i--, a += incA)
            test_lauum(Order, Uplo, N, a, lda);
         tim = time00() - t0;
         tim /= nreps;
         free(A);
      }
      else fprintf(stderr, "   WARNING: not enough mem to run timings!\n");
   }

   if (tim > 0.0) mflop = mflops / tim;
   else mflop = 0.0;
   if (Uplo == AtlasUpper) Ups = "Upper";
   else Ups = "Lower";
   if (Order == CblasColMajor) Ord = "Col";
   else Ord = "Row";
   fprintf(stdout, "%5d  %3s  %5s %6d %6d  %12.5f  %12.3f  %12e\n",
           nreps, Ord, Ups, N, lda, tim, mflop, resid);
   if (resid > thresh || resid != resid) passed = 0;
   else if (resid < 0.0) passed = -1;
   else passed = 1;
   return(passed);
}
Beispiel #20
0
static TYPE lutestR(int CacheSize, int M, int N, int lda, int *npiv,
                    double *tim)
{
   TYPE *A, *LmU;
   int *ipiv;
   const int MN = Mmin(M,N);
   int i;
   double t0, t1;
   TYPE normA, eps, resid;

   eps = Mjoin(PATL,epsilon)();
   A = malloc(ATL_MulBySize(lda)*M);
   if (A == NULL) return(-1);
   ipiv = malloc( MN * sizeof(int) );
   if (ipiv == NULL)
   {
      free(A);
      return(-1);
   }
   t0 = ATL_flushcache(CacheSize);

   Mjoin(PATL,gegen)(N, M, A, lda, M*N+lda);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("A0", N, M, A, lda);
   #endif
   normA = Mjoin(PATL,genrm1)(N, M, A, lda); /* actually infnrm, but OK */

   t0 = ATL_flushcache(-1);

   t0 = time00();
   test_getrf(CblasRowMajor, M, N, A, lda, ipiv);
   t1 = time00() - t0;
   *tim = t1;

   t0 = ATL_flushcache(0);

   #ifdef DEBUG
      Mjoin(PATL,geprint)("LU", N, M, A, lda);
   #endif
   LmU = ATL_LmulUR(M, N, A, lda);  /* LmU contains L * U */
   #ifdef DEBUG
      Mjoin(PATL,geprint)("L*U", N, M, LmU, N);
   #endif
   Mjoin(PATL,gegen)(N, M, A, lda, M*N+lda);  /* regenerate A, overwriting LU */
   ATL_laswp(M, A, lda, 0, MN, ipiv, 1);  /* apply swaps to A */
   resid = Mjoin(PATL,gediffnrm1)(N, M, A, lda, LmU, N);
   resid /= (normA * eps * Mmin(M,N));
   *npiv = findnpvt(MN, ipiv);

   free(LmU);
   free(A);
   free(ipiv);

   return(resid);
}
Beispiel #21
0
static TYPE llttest(enum ATLAS_UPLO Uplo, int CacheSize, int N, int lda,
                    double *tim)
{
   TYPE *A, *LmLt;
   int i;
   double t0, t1;
   TYPE normA, eps, resid;

   eps = Mjoin(PATL,epsilon)();
   A = malloc(ATL_MulBySize(lda)*N);
   if (A == NULL) return(-1);
   t0 = ATL_flushcache(CacheSize);
   lltgen(Uplo, N, A, lda, N*1029+lda);
   normA = lltnrm1(Uplo, N, A, lda);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("A0", N, N, A, lda);
   #endif

   t0 = ATL_flushcache(-1);

   t0 = time00();
   test_potrf(Uplo, N, A, lda);
   t1 = time00() - t0;
   *tim = t1;

   t0 = ATL_flushcache(0);

   #ifdef DEBUG
      Mjoin(PATL,geprint)("L", N, N, A, lda);
   #endif
   ATL_checkpad(Uplo, N, A, lda);
   if (Uplo == AtlasUpper) LmLt = ATL_UtmulU(N, A, lda);
   else LmLt = ATL_LmulLt(N, A, lda);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("L*Lt", N, N, LmLt, N);
   #endif
   lltgen(Uplo, N, A, lda, N*1029+lda);  /* regen A over LLt */
   lltdiff(Uplo, N, A, lda, LmLt, N);
   #ifdef DEBUG
      Mjoin(PATL,geprint)("A-L*Lt", N, N, LmLt, N);
   #endif
   resid = lltnrm1(Uplo, N, LmLt, N);
   #ifdef DEBUG
      if (resid/(normA*eps*N) > 10.0)
         fprintf(stderr, "normA=%e, eps=%e, num=%e\n", normA, eps, resid);
   #endif
   resid /= (normA * eps * N);

   free(LmLt);
   free(A);

   return(resid);
}
Beispiel #22
0
static TYPE *ATL_LmulUC(const int M, const int N, const TYPE *LU, const int ldl)
{
   const int lda = ldl SHIFT, MN = Mmin(M,N);
   int i, j, m;
   TYPE *C, *c;
   #ifdef TREAL
      const TYPE ONE=ATL_rone;
   #else
      const TYPE ONE[2] = {ATL_rone, ATL_rzero};
   #endif

   C = c = malloc(M*ATL_MulBySize(N));
   ATL_assert(c);
   if (M >= N)
   {
      for (j=0; j < MN; j++)
      {
         m = j SHIFT;
         for (i=0; i < m; i++) c[i] = ATL_rzero;
         #ifdef TCPLX
            c[i++] = ATL_rone;
            c[i++] = ATL_rzero;
         #else
            c[i++] = ATL_rone;
         #endif
         for (m=M SHIFT; i < m; i++) c[i] = LU[i];
         c += m;
         LU += lda;
      }
      LU -= MN * lda;
      for (m=M SHIFT; j < N; j++, c += m) Mjoin(PATL,zero)(M, c, 1);
      cblas_trmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,
                 CblasNonUnit, M, N, ONE, LU, ldl, C, M);
   }
   else /* M < N */
   {
      for (j=0; j < M; j++)
      {
         m = (j+1) SHIFT;
         for (i=0; i < m; i++) c[i] = LU[i];
         for (m=M SHIFT; i < m; i++) c[i] = ATL_rzero;
         c += m;
         LU += lda;
      }
      Mjoin(PATL,gecopy)(M, N-M, LU, ldl, c, M);
      LU -= M * lda;
      cblas_trmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans,
                 CblasUnit, M, N, ONE, LU, ldl, C, M);
   }
   return(C);
}
Beispiel #23
0
void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                const int N, const double alpha,
                const void *X, const int incX, void *A, const int lda)
{
   int info = 2000;
   void *vx;
   double one[2] = {ATL_rone, ATL_rzero};
   double *x0;
   const double *x=X;

#ifndef NoCblasErrorChecks
   if (Order != CblasColMajor && Order != CblasRowMajor)
      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
                          CblasRowMajor, CblasColMajor, Order);
   if (Uplo != CblasUpper && Uplo != CblasLower)
      info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d",
                          CblasUpper, CblasLower, Uplo);
   if (N < 0) info = cblas_errprn(3, info,
                        "N cannot be less than zero; is set to %d.", N);
   if (!incX) info = cblas_errprn(6, info,
                                  "incX cannot be zero; is set to %d.", incX);
   if (lda < N || lda < 1)
      info = cblas_errprn(8, info, "lda must be >= MAX(N,1): lda=%d N=%d",
                          lda, N);
   if (info != 2000)
   {
      cblas_xerbla(info, "cblas_zher", "");
      return;
   }
#endif

   if (incX < 0) x += (1-N)*incX<<1;

   if (Order == CblasColMajor)
      ATL_zher(Uplo, N, alpha, x, incX, A, lda);
   else if (alpha != ATL_rzero)
   {
      vx = malloc(ATL_Cachelen + ATL_MulBySize(N));
      ATL_assert(vx);
      x0 = ATL_AlignPtr(vx);
      ATL_zmoveConj(N, one, x, incX, x0, 1);
      ATL_zher(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
               N, alpha, x0, 1, A, lda);
      free(vx);
   }
   else
      ATL_zher(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
               N, ATL_rzero, x, incX, A, lda);
}
Beispiel #24
0
static TYPE *ATL_LtmulL
   (const enum CBLAS_ORDER Order, const int N, const TYPE *L, const int ldl)
{
   TYPE *C;
   #ifdef TREAL
      const TYPE one=ATL_rone, zero=ATL_rzero;
   #else
      const TYPE one[2] = {ATL_rone,ATL_rzero}, zero[2] = {ATL_rzero,ATL_rzero};
   #endif
   C = malloc(N*ATL_MulBySize(N));
   ATL_assert(C);
   ATL_L2GE(Order, N, L, ldl, C, N);
   cblas_trmm(Order, CblasLeft, CblasLower, CblasConjTrans, CblasNonUnit,
              N, N, one, L, ldl, C, N);
   return(C);
}
Beispiel #25
0
static TYPE *ATL_UmulUt
   (const enum CBLAS_ORDER Order, const int N, const TYPE *U, const int ldu)
{
   TYPE *C;
   #ifdef TREAL
      const TYPE one=ATL_rone, zero=ATL_rzero;
   #else
      const TYPE one[2] = {ATL_rone,ATL_rzero}, zero[2] = {ATL_rzero,ATL_rzero};
   #endif
   C = malloc(N*ATL_MulBySize(N));
   ATL_assert(C);
   ATL_U2GE(Order, N, U, ldu, C, N);
   cblas_trmm(Order, CblasRight, CblasUpper, CblasConjTrans, CblasNonUnit,
              N, N, one, U, ldu, C, N);
   return(C);
}
Beispiel #26
0
static TYPE *ATL_LmulUR(const int M, const int N, const TYPE *LU, const int ldl)
{
   const int lda = ldl SHIFT, ldc = N SHIFT, MN = Mmin(M,N);
   int i, j, m;
   TYPE *C, *c;
   #ifdef TREAL
      const TYPE ONE=ATL_rone;
   #else
      const TYPE ONE[2] = {ATL_rone, ATL_rzero};
   #endif

   C = c = malloc(M*ATL_MulBySize(N));
   ATL_assert(c);
   if (M >= N)
   {
      for (i=0; i != N; i++, LU += lda, C += ldc)
      {
         Mjoin(PATL,copy)(i+1, LU, 1, C, 1);
         Mjoin(PATL,zero)(N-i-1, C+((i+1)SHIFT), 1);
      }
      for(; i != M; i++, LU += lda, C += ldc) Mjoin(PATL,copy)(N, LU, 1, C, 1);
      LU -= lda * M;
      C -= ldc * M;
      cblas_trmm(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans, CblasUnit,
                 M, N, ONE, LU, ldl, C, N);
   }
   else /* N > M */
   {
      for (i=0; i != M; i++, C += ldc, LU += lda)
      {
         Mjoin(PATL,zero)(i, C, 1);
         C[i SHIFT] = ATL_rone;
         #ifdef TCPLX
            C[(i SHIFT)+1] = ATL_rzero;
         #endif
         Mjoin(PATL,copy)(N-i-1, LU+((i+1)SHIFT), 1, C+((i+1)SHIFT), 1);
      }
      LU -= lda * M;
      C -= ldc * M;
      cblas_trmm(CblasRowMajor, CblasLeft, CblasLower, CblasNoTrans,
                 CblasNonUnit, M, N, ONE, LU, ldl, C, N);
   }
   return(C);
}
Beispiel #27
0
int Mjoin(PC2F,gels)(const enum CBLAS_TRANSPOSE TA, ATL_CINT M, ATL_CINT N,
                     ATL_CINT NRHS, TYPE *A, ATL_CINT lda,
                     TYPE *B, ATL_CINT ldb)
{
   TYPE work[2];
   TYPE *wrk;
   ATL_INT lwrk;
   int iret;
/*
 * Query routine for optimal workspace, allocate it, and call routine with it
 */
   ATL_assert(!Mjoin(PC2F,gels_wrk)(TA, M, N, NRHS, A, lda, B, ldb, work, -1));
   lwrk = work[0];
   wrk = malloc(ATL_MulBySize(lwrk));
   ATL_assert(wrk);
   iret = Mjoin(PC2F,gels_wrk)(TA, M, N, NRHS, A, lda, B, ldb, wrk, lwrk);
   free(wrk);
   return(iret);
}
Beispiel #28
0
static TYPE poresid(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
                    int N, TYPE *A, int lda, TYPE *AI, int ldi)
/*
 * returns ||A - AI|| / (N * eps * ||A|| * ||AI||);
 */
{
   enum CBLAS_UPLO uplo=Uplo;
   TYPE numer, denom, eps;
   const int ldcp1 = (N+1)SHIFT;
   int i;
   #ifdef TREAL
      TYPE one = ATL_rone, zero = ATL_rzero;
   #else
      TYPE one[2] = {ATL_rone, ATL_rzero}, zero[2] = {ATL_rzero, ATL_rzero};
   #endif
   TYPE *C, *B;

   C = malloc(N*ATL_MulBySize(N));
   ATL_assert(C);
   B = DupMat(Order, N, N, AI, ldi, N);
   ReflectHE(Order, Uplo, N, B, N);
   #ifdef TREAL
      cblas_symm(Order, CblasRight, Uplo, N, N, one, A, lda, B, N, zero, C, N);
   #else
      cblas_hemm(Order, CblasRight, Uplo, N, N, one, A, lda, B, N, zero, C, N);
   #endif
   free(B);
   eps = Mjoin(PATL,epsilon)();
   if (Order == CblasRowMajor)
      uplo = (Uplo == CblasUpper) ? CblasLower : CblasUpper;
   for (i=0; i != N; i++) C[i*ldcp1] -= ATL_rone;  /* C now has A*inv(A)-I */
   numer = Mjoin(PATL,genrm1)(N, N, C, N);
   #ifdef TREAL
      denom = Mjoin(PATL,synrm)(uplo, N, A, lda) *
              Mjoin(PATL,synrm)(uplo, N, AI, ldi) * N * eps;
   #else
      denom = Mjoin(PATL,henrm)(uplo, N, A, lda) *
              Mjoin(PATL,henrm)(uplo, N, AI, ldi) * N * eps;
   #endif
   free(C);
   return(numer/denom);
}
Beispiel #29
0
void Mjoin(Mjoin(PATL,hemmL),UploNM)
   (const int M, const int N, const void *alpha, const void *A, const int lda,
    const void *B, const int ldb, const void *beta, void *C, const int ldc)
{
   TYPE *a;
   void *va;

   if (N > HEMM_Xover)
   {
      va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M));
      ATL_assert(va);
      a = ATL_AlignPtr(va);
      Mjoin(Mjoin(PATL,hecopy),UploNM)(M, A, lda, a);
      ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, M, alpha, a, M, B, ldb,
               beta, C, ldc);
      free(va);
   }
   else Mjoin(PATL,refhemm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb,
                            beta, C, ldc);
}
Beispiel #30
0
static TYPE *ATL_UtmulU(const int N, const TYPE *U, const int ldu)
{
   TYPE *A;
   int i, j;
   #ifdef TCPLX
      const int ldu2 = ldu<<1;
      int i1, i2;
      TYPE tmp;
   #endif

   A = malloc(N*ATL_MulBySize(N));
   ATL_assert(A);
   for (j=0; j < N; j++)
   {
   #ifdef TREAL
      for (i=0; i <= j; i++)
         A[i+j*N] = Mjoin(PATL,dot)(i+1, U+i*ldu, 1, U+ldu*j, 1);
   #else
      for (i=0; i <= j; i++)
      {
         i1 = (i+j*N)<<1;
         i2 = (i+j*ldu)<<1;
         tmp = U[(i+i*ldu)<<1];
         Mjoin(PATL,dotc_sub)(i, U+i*ldu2, 1, U+j*ldu2, 1, A+i1);
         if (i != j)
         {
            A[i1] += U[i2] * tmp;
            A[i1+1] += U[i2+1] * tmp;
         }
         else
         {
            A[i1] += tmp * tmp;
            A[i1+1] += ATL_rzero;
         }
      }
   #endif
   }
   return(A);
}