Esempio n. 1
0
void ATL_signal_tree
(
   PT_TREE_T                  ROOT
)
{
/*
 * Purpose
 * =======
 *
 * ATL_signal_tree signals the end of the node function to its peer.
 *
 * Arguments
 * =========
 *
 * ROOT    (input)                       PT_TREE_T
 *         On entry, ROOT specifies the node emitting the signal.
 *
 * ---------------------------------------------------------------------
 */
/* ..
 * .. Executable Statements ..
 *
 */
   ATL_assert(!pthread_mutex_lock  ( &(ROOT->mutex) )); ROOT->count++;
   ATL_assert(!pthread_cond_signal ( &(ROOT->cond)  ));
   ATL_assert(!pthread_mutex_unlock( &(ROOT->mutex) ));
/*
 * End of ATL_signal_tree
 */
}
Esempio n. 2
0
static TYPE *DupMat(enum ATLAS_ORDER Order, int M, int N, TYPE *A, int lda,
                    int ldc)
/*
 * returns a duplicate of the A matrix, with new leading dimension
 */
{
   int i, j, M2;
   const int ldc2 = (ldc SHIFT), lda2 = (lda SHIFT);
   TYPE *C;
   if (Order == CblasRowMajor)
   {
      i = M;
      M = N;
      N = i;
   }
   M2 = M SHIFT;
   ATL_assert(ldc >= M);
   C = malloc(ATL_MulBySize(ldc)*N);
   ATL_assert(C);
   for (j=0; j != N; j++)
   {
      for (i=0; i != M2; i++) C[i] = A[i];
      C += ldc2;
      A += lda2;
   }
   return(C-N*ldc2);
}
Esempio n. 3
0
int f77getrf(const enum ATLAS_ORDER Order, const int M, const int N,
             TYPE *A, const int lda, int *ipiv)
{
   int i;
   const int MN=Mmin(M,N);
   #ifdef ATL_FunkyInts
      const F77_INTEGER F77M=M, F77N=N, F77lda=lda;
      F77_INTEGER info, *F77ipiv;
   #else
      int info;
      #define F77M M
      #define F77N N
      #define F77lda lda
      #define F77ipiv ipiv
   #endif
   #ifdef ATL_FunkyInts
      F77ipiv = malloc(MN * sizeof(F77_INTEGER));
      ATL_assert(F77ipiv);
   #endif
   ATL_assert(Order == AtlasColMajor);

   F77GETRF(&F77M, &F77N, A, &F77lda, F77ipiv, &info);

   #ifdef ATL_FunkyInts
      for (i=0; i < MN; i++) ipiv[i] = F77ipiv[i] - 1;
      free(F77ipiv);
   #else
      for (i=0; i < MN; i++) ipiv[i]--;
   #endif
   return(info);
}
Esempio n. 4
0
int *RoutNames2IntList(int nargs, char **args, int i)
{
   int n, *iarr, k;

   if (++i >= nargs)
      PrintUsage(args[0], i, NULL);
   n = atoi(args[i]);
   ATL_assert(n > 0);
   iarr = malloc(sizeof(int)*(n+1));
   ATL_assert(iarr);

   iarr[0] = n;
   for (k=0; k < n; k++)
   {
      if (++i >= nargs)
         PrintUsage(args[0], i, NULL);
      if (!strcmp(args[i], "getrf") || !strcmp(args[i], "GETRF"))
         iarr[k+1] = LAgetrf;
      else if (!strcmp(args[i], "potrf") || !strcmp(args[i], "POTRF"))
         iarr[k+1] = LApotrf;
      else if (!strcmp(args[i], "geqrf") || !strcmp(args[i], "GEQRF"))
         iarr[k+1] = LAgeqrf;
      else if (!strcmp(args[i], "geqlf") || !strcmp(args[i], "GEQLF"))
         iarr[k+1] = LAgeqrf;
      else if (!strcmp(args[i], "gerqf") || !strcmp(args[i], "GERQF"))
         iarr[k+1] = LAgeqrf;
      else if (!strcmp(args[i], "gelqf") || !strcmp(args[i], "GELQF"))
         iarr[k+1] = LAgeqrf;
      else
         PrintUsage(args[0], i, args[i]);
   }
   return(iarr);
}
Esempio n. 5
0
double GetTimeWithReps_LU
   (int mflopF, int lda, int M, int N, int nb, int Uplo, int Side, int flsizeKB)
{
   double mflop, t0, t1, drep;
   char *wrksets;       /* working sets for kernel calls */
#ifdef TCPLX
   const int lda2 = lda+lda;
#else
   const int lda2 = lda;
#endif
   size_t setsz, setszT;   /* work set size in memory, and amnt of it touched */
   size_t nrep;            /* # of reps required to force mflopF flops */
   size_t nset;            /* # of working sets allocated */
   int i;
/*
 * Keep setsz a multiple of TYPE size for alignment reasons.  LU only accesses
 * M*N of matrix and all of IPIV.
 */
   setsz = lda*N*ATL_sizeof +
           ((M*sizeof(int)+ATL_sizeof-1)/ATL_sizeof)*ATL_sizeof;
   setszT = M*N*ATL_sizeof + M*sizeof(int);
   mflop = GetFlopCount(LAgetrf, 0, M, N, 0, 0, CAN_NB);
/*
 * Cannot reuse matrices (bogus to factor an already factored matrix), so we
 * must take as our total memspace MAX(nrep,nset)*setsz
 */
   ATL_assert(mflop > 0.0);
   drep = (mflopF*1.0e6) / mflop;
   nrep = (int)(drep+0.999999);
/*
 * If cacheline flush doesn't work, then we must use this method
 */
   #if ATL_LINEFLUSH
      if (nrep < 2)
         return(-1.0);                                /* do wt normal timer */
   #else
      nrep = (nrep >= 1) ? nrep : 1;
   #endif

   nset = (flsizeKB*1024+setszT-1)/setszT;
   if (nset < nrep)
      nset = nrep;
   wrksets = malloc(nset * setsz);
   ATL_assert(wrksets);

   for (i=0; i < nset; i++)
      Mjoin(PATL,gegen)(M, N, (TYPE*)(wrksets+i*setsz), lda, M*N+lda);

   t0 = time00();
   for (i=0; i < nrep; i++)
   {
      test_getrf(CblasColMajor, M, N, (TYPE*)(wrksets+i*setsz), lda,
                 (int*)(wrksets+i*setsz+lda*N*ATL_sizeof));
   }
   t1 = time00();
   free(wrksets);

   return((t1-t0)/((double)nrep));
}
Esempio n. 6
0
/*
 * computes (i,j) non-diagonal block of C
 */
static void DoCblk(const int rank, ATL_tsyrk_ammN_t *pd, TYPE *wC, int i, int j)
{
   const ammkern_t amm = pd->amm_b1;
   const unsigned int nkblks=pd->nkblks, bs=pd->blkszA, kb=pd->kb, NB=pd->nb;
   unsigned int nmu, nnu, mb, nb;
   const TYPE *wA, *wB, *wAn, *wBn;
   TYPE *c;
   int k;

   if (!(pd->LOWER))
   {
      k = i;
      i = j;
      j = k;
   }
   if (j != pd->ndiag-1)
   {
      nnu = pd->nnu;
      nb = pd->nb;
   }
   else
   {
      nnu = pd->nnuf;
      nb = pd->nbf;
   }
   if (i != pd->ndiag-1)
   {
      nmu = pd->nmu;
      mb = pd->nb;
   }
   else
   {
      nmu = pd->nmuf;
      mb = pd->nbf;
   }
   wA = pd->wA + i*pd->panszA;
   wB = pd->wAt + j*pd->panszA;
   wA = pd->wA + i*pd->panszA;
   wB = pd->wAt + j*pd->panszA;
   wAn = wA + bs;
   wBn = wB + bs;
   #ifdef DEBUG2
      if (!ATL_IsBitSetBV(pd->cpydonBV, i) || !ATL_IsBitSetBV(pd->cpydonBV, j))
          fprintf(stderr, "%d: ndiag=%d, i=%d, j=%d\n", rank, pd->ndiag, i, j);
      ATL_assert(ATL_IsBitSetBV(pd->cpydonBV, i));
      ATL_assert(ATL_IsBitSetBV(pd->cpydonBV, j));
   #endif
   pd->ammK(nmu, nnu, pd->KB0, wA, wB, wC, wAn, wBn, wC);
   for (k=1; k < nkblks; k++)
   {
      wA = wAn;
      wB = wBn;
      wAn += bs;
      wBn += bs;
      amm(nmu, nnu, kb, wA, wB, wC, wAn, wBn, wC);
   }
   pd->blk2c(mb, nb, *(pd->alpha), wC, *(pd->beta),
             pd->C+ NB*(j*(size_t)(pd->ldc) + i), pd->ldc);
}
Esempio n. 7
0
double GetTimeWithReps_LLT
   (int mflopF, int lda, int M, int N, int nb, int Uplo, int Side, int flsizeKB)
{
   double mflop, t0, t1, drep;
   char *wrksets;       /* working sets for kernel calls */
#ifdef TCPLX
   const int lda2 = lda+lda;
#else
   const int lda2 = lda;
#endif
   size_t setsz, setszT;   /* work set size in memory, and amnt of it touched */
   size_t nrep;            /* # of reps required to force mflopF flops */
   size_t nset;            /* # of working sets allocated */
   int i;
   setsz=lda*N*ATL_sizeof;   /* matrix is entire working set of LLt */
   setszT=N*N*ATL_sizeof;    /* only touch N*N portion */
   mflop = GetFlopCount(LApotrf, Uplo, M, N, 0, 0, CAN_NB);
/*
 * Cannot reuse matrices (bogus to factor an already factored matrix), so we
 * must take as our total memspace MAX(nrep,nset)*setsz
 */
   ATL_assert(mflop > 0.0);
   drep = (mflopF*1.0e6) / mflop;
   nrep = (int)(drep+0.999999);
/*
 * If cacheline flush doesn't work, then we must use this method
 */
   #if ATL_LINEFLUSH
      if (nrep < 2)
         return(-1.0);                                /* do wt normal timer */
   #else
      nrep = (nrep >= 1) ? nrep : 1;
   #endif

   nset = (flsizeKB*1024+setszT-1)/setszT;
   if (nset < nrep)
      nset = nrep;
   wrksets = malloc(nset * setsz);
   ATL_assert(wrksets);

   for (i=0; i < nset; i++)
      PosDefGen(CblasColMajor, Uplo_LA2ATL(Uplo), N,
                (TYPE*)(wrksets+i*setsz), lda);

   t0 = time00();
   for (i=0; i < nrep; i++)
   {
      test_potrf(Uplo, N, (TYPE*)(wrksets+i*setsz), lda);
   }
   t1 = time00();
   free(wrksets);

   return((t1-t0)/((double)nrep));
}
Esempio n. 8
0
int ATL_thread_join(ATL_thread_t *thr)   /* waits on completion of thread */
{
#ifdef ATL_WINTHREADS
   ATL_assert(WaitForSingleObject(thr->thrH, INFINITE) != WAIT_FAILED);
   ATL_assert(CloseHandle(thr->thrH));
#elif defined(ATL_OMP_THREADS)
   fprintf(stderr, "Cannot call thread_join using OpenMP!!\n");
   ATL_assert(0);  /* should never enter this rout when using OMP */
#else
   ATL_assert(!pthread_join(thr->thrH, NULL));
#endif
   return(0);
}
Esempio n. 9
0
void Mjoin(Mjoin(Mjoin(PATL,syrk),UploNM),T)
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *vbeta, void *C, const int ldc)
{
   void *vc;
   TYPE *c;
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR beta =*( (const SCALAR *)vbeta  );
      const SCALAR one=1.0, zero=0.0;
   #else
      #define alpha valpha
      const TYPE *beta=vbeta;
      const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};
   #endif

   if (K > SYRK_Xover)
   {
      vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);
      ATL_assert(vc);
      c = ATL_AlignPtr(vc);
      CgemmTN(N, N, K, alpha, A, lda, A, lda, zero, c, N);
      if ( SCALAR_IS_ONE(beta) ) Mjoin(syr_put,_b1)(N, c, beta, C, ldc);
      else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr_put,_b0)(N, c, beta, C, ldc);
      #ifdef TCPLX
         else if ( SCALAR_IS_NONE(beta) )
            Mjoin(syr_put,_bn1)(N, c, beta, C, ldc);
         else if (beta[1] == *zero) Mjoin(syr_put,_bXi0)(N, c, beta, C, ldc);
      #endif
      else Mjoin(syr_put,_bX)(N, c, beta, C, ldc);
      free(vc);
   }
   else Mjoin(PATL,refsyrk)(Uplo_, AtlasTrans, N, K, alpha, A, lda,
                            beta, C, ldc);
}
Esempio n. 10
0
double *TimeOnCores(struct kmm_struct *kb)
{
    struct kmm_struct *kp;
    pthread_t *threads;
    pthread_attr_t *attr;
    cpu_set_t cpuset;
    double *mflops;
    int i, p;

    p = kb->p;
    kp = malloc(sizeof(struct kmm_struct)*p);
    threads = malloc(sizeof(pthread_t)*p);
    attr = malloc(sizeof(pthread_attr_t)*p);
    mflops = malloc(sizeof(double)*p);
    ATL_assert(kp && threads && attr && mflops);
    for (i=0; i < p; i++)
    {
        memcpy(kp+i, kb, sizeof(struct kmm_struct));
        kp[i].iam = i;
        CPU_ZERO(&cpuset);
        CPU_SET(kp->pids[i], &cpuset);
        assert(!pthread_attr_setaffinity_np(attr+i, sizeof(cpuset), &cpuset));
        pthread_create(threads+i, attr+i, TimeOnCore, kp+i);
    }
    for (i=0; i < p; i++)
    {
        pthread_join(threads[i], NULL);
        mflops[i] = kp[i].mf;
    }
    free(kp->pids);
    free(kp);
    free(threads);
    free(attr);
    return(mflops);
}
Esempio n. 11
0
void Mjoin(Mjoin(PATL,trmmL),ATLP)
   (const int M, const int N, const void *valpha, const void *A, const int lda,
    void *C, const int ldc)
{
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR one=1.0, zero=0.0;
   #else
      const TYPE zero[2]={0.0,0.0};
      #define alpha valpha
   #endif
   void *va;
   TYPE *a;

   if (N > TRMM_Xover)
   {
      va = malloc(ATL_Cachelen + ATL_MulBySize(M)*M);
      ATL_assert(va);
      a = ATL_AlignPtr(va);
      #ifdef TREAL
         if ( SCALAR_IS_ONE(alpha) ) Mjoin(ATL_trcopy,_a1)(M, alpha, A, lda, a);
         else Mjoin(ATL_trcopy,_aX)(M, alpha, A, lda, a);
         CAgemmTN(M, N, M, one, a, M, C, ldc, zero, C, ldc);
      #else
         ATL_trcopy(M, A, lda, a);
         CAgemmTN(M, N, M, valpha, a, M, C, ldc, zero, C, ldc);
      #endif
      free(va);
   }
   else Mjoin(PATL,reftrmm)(AtlasLeft, Uplo_, Trans_, Unit_, M, N, alpha,
                            A, lda, C, ldc);
}
Esempio n. 12
0
void Mjoin(Mjoin(PATL,symmR),UploNM)
   (const int M, const int N, const void *valpha, const void *A, const int lda,
    const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR beta =*( (const SCALAR *)vbeta  );
      const SCALAR one=1.0;
   #else
      #define alpha valpha
      #define beta  vbeta
   #endif
   void *va;
   TYPE *a;

   if (M > SYMM_Xover)
   {
      va = malloc(ATL_Cachelen + ATL_MulBySize(N)*N);
      ATL_assert(va);
      a = ATL_AlignPtr(va);
      #ifdef TREAL
         if ( SCALAR_IS_ONE(alpha) )
            Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(N, alpha, A, lda, a);
         else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(N, alpha, A, lda, a);
         ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, one, B, ldb, a, N, beta, C, ldc);
      #else
         Mjoin(Mjoin(PATL,sycopy),UploNM)(N, A, lda, a);
         ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, valpha, B, ldb, a, N, vbeta, C, ldc);
      #endif
      free(va);
   }
   else Mjoin(PATL,refsymm)(AtlasRight, Uplo_, M, N, alpha, A, lda, B, ldb,
                            beta, C, ldc);
}
Esempio n. 13
0
void Mjoin(Mjoin(Mjoin(PATL,herk),UploNM),N)
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *vbeta, void *C, const int ldc)
{
   void *vc;
   TYPE *c;
   TYPE alpha[2];
   const TYPE beta = *( (const TYPE *)vbeta  );
   const TYPE zero[2] = {0.0, 0.0};

   alpha[0] = *( (const TYPE *)valpha );
   if (K > HERK_Xover)
   {
      alpha[1] = 0.0;
      vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);
      ATL_assert(vc);
      c = ATL_AlignPtr(vc);
      CgemmNC(N, N, K, alpha, A, lda, A, lda, zero, c, N);
      if ( beta == 1.0 ) Mjoin(her_put,_b1)(N, c, vbeta, C, ldc);
      else if ( beta == 0.0 ) Mjoin(her_put,_b0)(N, c, vbeta, C, ldc);
      else Mjoin(her_put,_bXi0)(N, c, vbeta, C, ldc);
      free(vc);
   }
   else Mjoin(PATL,refherk)(Uplo_, AtlasNoTrans, N, K, *alpha, A, lda,
                            beta, C, ldc);
}
Esempio n. 14
0
void Mjoin(PATL,gpmm)
   (const enum PACK_UPLO UA, const enum PACK_TRANS TA,
    const enum PACK_UPLO UB, const enum PACK_TRANS TB, const enum PACK_UPLO UC,
    const int M, const int N, const int K, const SCALAR alpha,
    const TYPE *A, const int IA, const int JA, const int lda,
    const TYPE *B, const int IB, const int JB, const int ldb,
    const SCALAR beta, TYPE *C, const int IC, const int JC, const int ldc)
{
   int j;
   #ifdef CacheEdge
      static const int CE_K = ((ATL_DivBySize(CacheEdge)-(NBNB SHIFT)) /
                                           (NB*(NB+NB)))*NB;
   #else
      #define CE_K K
   #endif
   if (!M || !N) return;
   if (!K || SCALAR_IS_ZERO(alpha))
   {
      for (j=0; j != N; j++)
         Mjoin(PATL,scal)(M, beta, C+MindexP(UC,IC,JC+j,ldc), 1);
      return;
   }
/*
 * Packed gpmm not yet implemented for complex,
 * so die if not really a dense gemm
 */
   #ifdef TCPLX
      ATL_assert (UA == PackGen && UB == PackGen && UC == PackGen);
      Mjoin(PATL,gemm)(TA, TB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
   #else
   Mjoin(PATL,prankK)(UA, TA, UB, TB, M, N, K, CE_K, alpha,
          A+MindexP(UA,IA,JA,lda), Mpld(UA,JA,lda), B+MindexP(UB,IB,JB,ldb),
          Mpld(UB,JB,ldb), beta, UC, C+MindexP(UC,IB,JB,ldc), Mpld(UC,JC,ldc));
   #endif
}
Esempio n. 15
0
int f77gesv(const int N, const int NRHS, TYPE *A, const int lda,
            int *ipiv, TYPE *B, const int ldb)
{
   #ifdef ATL_FunkyInts
      const F77_INTEGER F77N=N, F77lda=lda, F77ldb=ldb, F77NRHS=NRHS;
      F77_INTEGER info;
      F77_INTEGER *F77ipiv;
   #else
      int info;
      #define F77N N
      #define F77NRHS NRHS
      #define F77lda lda
      #define F77ldb ldb
      #define F77ipiv ipiv
   #endif
   int i;
   #ifdef ATL_FunkyInts
      F77ipiv = malloc(N*sizeof(F77_INTEGER));
      ATL_assert(F77ipiv);
   #endif
   F77GESV(&F77N, &F77NRHS, A, &F77lda, F77ipiv, B, &F77ldb, &info);
   #ifdef ATL_FunkyInts
      for (i=0; i < N; i++) ipiv[i] = F77ipiv[i] - 1;
      free(F77ipiv);
   #else
      for (i=0; i < N; i++) ipiv[i]--;
   #endif
   return(info);
}
Esempio n. 16
0
static TYPE *ATL_LmulLt(const int N, const TYPE *L, const int ldl)
/*
 * A = L * L^H
 */
{
   const int incA = 1 SHIFT, incL = (ldl+1) SHIFT;
   TYPE *A;
   int i, j;
   #ifdef TCPLX
      int i1, i2;
      TYPE tmp;
   #endif

   A = malloc(N*ATL_MulBySize(N));
   ATL_assert(A);
   for (j=0; j < N; j++)
   {
      for (i=j; i < N; i++)
      {
      #ifdef TREAL
         A[i+j*N] = L[i+j*ldl] * L[j+j*ldl] +
                    Mjoin(PATL,dot)(j, L+i, ldl, L+j, ldl);
      #else
         tmp = L[(j+j*ldl)<<1];
         i1 = (i + j * N)<<1;
         i2 = (i + j * ldl)<<1;
         Mjoin(PATL,dotc_sub)(j, L+(j<<1), ldl, L+(i<<1), ldl, A+i1);
         A[i1] += L[i2] * tmp;
         if (i != j) A[i1+1] += tmp * L[i2+1];
      #endif
      }
   }
   return(A);
}
Esempio n. 17
0
static void row2blkT_NB(const int M, const int N, const TYPE *A, const int lda,
                        TYPE *vr, TYPE *vi, const SCALAR alpha)
{
   const int incA = lda<<2, incv = 2 - NBNB;
   const TYPE *pA0 = A, *pA1 = A + (lda<<1);
   int i, j;
   #ifdef ALPHAXI0
      #ifdef Conj_
         const register TYPE ralpha = *alpha, calpha = -ralpha;
      #else
         const register TYPE ralpha = *alpha;
      #endif
   #elif defined(ALPHAX)
      const register TYPE ralpha = *alpha, ialpha = alpha[1];
      register TYPE ra, ia;
   #endif

   #if ((NB/2)*2 != NB)  /* ATLAS should ensure NB divisible by 2 */
      ATL_assert((NB/2)*2 == NB);
   #endif
   for (j=(NB>>1); j; j --, pA0 += incA, pA1 += incA, vr += incv, vi += incv)
   {
      for (i=0; i != NB2; i += 2, vr += NB, vi += NB)
      {
         scalcp(pA0+i, vr, vi);
         scalcp(pA1+i, vr+1, vi+1);
      }
   }
}
Esempio n. 18
0
static TYPE geresid(enum CBLAS_ORDER Order, int N, TYPE *A, int lda,
                    TYPE *AI, int ldi)
/*
 * returns ||A - AI|| / (N * eps * ||A|| * ||AI||);
 * for row-major, we are not using 1-norm, since we are adding rows instead
 * of cols, but it should be an equally good norm, so don't worry about it.
 */
{
   TYPE numer, denom, eps;
   const int ldcp1 = (N+1)SHIFT;
   TYPE *C;
   int i;

   #ifdef TREAL
      TYPE one = ATL_rone, zero = ATL_rzero;
   #else
      TYPE one[2] = {ATL_rone, ATL_rzero}, zero[2] = {ATL_rzero, ATL_rzero};
   #endif

   eps = Mjoin(PATL,epsilon)();
   C = malloc(N*ATL_MulBySize(N));
   ATL_assert(C);
   cblas_gemm(Order, CblasNoTrans, CblasNoTrans, N, N, N, one, A, lda,
              AI, ldi, zero, C, N);                /* C now has A*inv(A) */
   for (i=0; i != N; i++) C[i*ldcp1] -= ATL_rone;  /* C now has A*inv(A)-I */
   numer = Mjoin(PATL,genrm1)(N, N, C, N);
   denom = Mjoin(PATL,genrm1)(N, N, A, lda) *
           Mjoin(PATL,genrm1)(N, N, AI, ldi) * N * eps;
   free(C);
   return(numer/denom);
}
Esempio n. 19
0
void Mjoin(Mjoin(PATL,symmL),UploNM)
(const int M, const int N, const void *valpha, const void *A, const int lda,
 const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
#ifdef TREAL
    const SCALAR alpha=*( (const SCALAR *)valpha );
    const SCALAR beta =*( (const SCALAR *)vbeta  );
    const SCALAR one=1.0;
#else
#define alpha valpha
#define beta vbeta
#endif
    TYPE *a;
    void *va;

    if (N > SYMM_Xover)
    {
        va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M));
        ATL_assert(va);
        a = ATL_AlignPtr(va);
#ifdef TREAL
        if ( SCALAR_IS_ONE(alpha) )
            Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(M, alpha, A, lda, a);
        else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(M, alpha, A, lda, a);
        CgemmTN(M, N, M, one, a, M, B, ldb, beta, C, ldc);
#else
        Mjoin(Mjoin(PATL,sycopy),UploNM)(M, A, lda, a);
        CgemmTN(M, N, M, valpha, a, M, B, ldb, vbeta, C, ldc);
#endif
        free(va);
    }
    else Mjoin(PATL,refsymm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb,
                                 beta, C, ldc);
}
Esempio n. 20
0
static void geinv
   (const enum CBLAS_ORDER Order, const int N, TYPE *A, const int lda)
{
   int *ipiv;
   TYPE *wrk;
   int lwrk;

   ipiv = malloc(sizeof(int)*N);
   ATL_assert(ipiv);
   #ifdef TimeF77
      lwrk = N * Mjoin(PATL,GetNB)();
      wrk = malloc(ATL_MulBySize(lwrk));
      if (Order == AtlasRowMajor) Mjoin(PATL,tstsqtran)(N, A, lda);
      ATL_assert(Mjoin(PATL,f77getrf)(AtlasColMajor, N, N, A, lda, ipiv) == 0);
      ATL_assert(Mjoin(PATL,f77getri)
         (AtlasColMajor, N, A, lda, ipiv, wrk, &lwrk) == 0);
      if (Order == AtlasRowMajor) Mjoin(PATL,tstsqtran)(N, A, lda);
      free(wrk);
   #elif defined(TimeC)
      ATL_assert(Mjoin(CLP,getrf)(Order, N, N, A, lda, ipiv) == 0);
      ATL_assert(Mjoin(CLP,getri)(Order, N, A, lda, ipiv) == 0);
  #else
      lwrk = N * Mjoin(PATL,GetNB)();
      wrk = malloc(ATL_MulBySize(lwrk));
      ATL_assert(Mjoin(PATL,getrf)(Order, N, N, A, lda, ipiv) == 0);
      ATL_assert(Mjoin(PATL,getri)(Order, N, A, lda, ipiv, wrk, &lwrk) == 0);
      free(wrk);
   #endif
   free(ipiv);
}
Esempio n. 21
0
void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
                 const void *alpha, const void *X, const int incX,
                 const void *Y, const int incY, void *A, const int lda)
{
   int info = 2000;
   const double *x = X, *y = Y;
   void *vy;
   double *y0;
   double one[2] = {ATL_rone, ATL_rzero};

#ifndef NoCblasErrorChecks
   if (M < 0) info = cblas_errprn(2, info,
                        "M cannot be less than zero; is set to %d.", M);
   if (N < 0) info = cblas_errprn(3, info,
                        "N cannot be less than zero; is set to %d.", N);
   if (!incX) info = cblas_errprn(6, info,
                                  "incX cannot be zero; is set to %d.", incX);
   if (!incY) info = cblas_errprn(8, info,
                                  "incY cannot be zero; is set to %d.", incY);
   if (Order == CblasColMajor)
   {
      if (lda < M || lda < 1)
         info = cblas_errprn(10, info, "lda must be >= MAX(M,1): lda=%d M=%d",
                             lda, M);
   }
   else if (Order == CblasRowMajor)
   {
      if (lda < N || lda < 1)
         info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d M=%d",
                             lda, N);
   }
   else
      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
                          CblasRowMajor, CblasColMajor, Order);
   if (info != 2000)
   {
      cblas_xerbla(info, "cblas_zgerc", "");
      return;
   }
#endif

   if (incX < 0) x += (1-M)*incX<<1;
   if (incY < 0) y += (1-N)*incY<<1;

   if (Order == CblasColMajor)
      ATL_zgerc(M, N, alpha, x, incX, y, incY, A, lda);
   else
   {
      vy = malloc(ATL_Cachelen + ATL_MulBySize(N));
      ATL_assert(vy);
      y0 = ATL_AlignPtr(vy);
      ATL_zmoveConj(N, alpha, y, incY, y0, 1);
      ATL_zgeru(N, M, one, y0, 1, x, incX, A, lda);
      free(vy);
   }
}
Esempio n. 22
0
void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                 const int N, const void *alpha,
                 const void *X, const int incX,
                 const void *Y, const int incY, void *A, const int lda)
{
   int info = 2000;
   void *vx, *vy;
   float *x0, *y0;
   const float *x=X, *y=Y, *alp=alpha;
   const float one[2]={ATL_rone, ATL_rzero};

#ifndef NoCblasErrorChecks
   if (Order != CblasColMajor && Order != CblasRowMajor)
      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
                          CblasRowMajor, CblasColMajor, Order);
   if (Uplo != CblasUpper && Uplo != CblasLower)
      info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d",
                          CblasUpper, CblasLower, Uplo);
   if (N < 0) info = cblas_errprn(3, info,
                        "N cannot be less than zero; is set to %d.", N);
   if (!incX) info = cblas_errprn(6, info,
                                  "incX cannot be zero; is set to %d.", incX);
   if (!incY) info = cblas_errprn(8, info,
                                  "incY cannot be zero; is set to %d.", incY);
   if (lda < N || lda < 1)
      info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d N=%d",
                          lda, N);
   if (info != 2000)
   {
      cblas_xerbla(info, "cblas_cher2", "");
      return;
   }
#endif

   if (incX < 0) x += (1-N)*incX<<1;
   if (incY < 0) y += (1-N)*incY<<1;

   if (Order == CblasColMajor)
      ATL_cher2(Uplo, N, alpha, x, incX, y, incY, A, lda);
   else if (alp[0] != ATL_rzero || alp[1] != ATL_rzero)
   {
      vx = malloc(ATL_Cachelen + ATL_MulBySize(N));
      vy = malloc(ATL_Cachelen + ATL_MulBySize(N));
      ATL_assert(vx != NULL && vy != NULL);
      x0 = ATL_AlignPtr(vx);
      y0 = ATL_AlignPtr(vy);
      ATL_cmoveConj(N, alpha, y, incY, y0, 1);
      ATL_ccopyConj(N, x, incX, x0, 1);
      ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
                N, one, y0, 1, x0, 1, A, lda);
      free(vx);
      free(vy);
   }
   else ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
                  N, alpha, y, incY, x, incX, A, lda);
}
Esempio n. 23
0
int Mjoin(PC2F,gels)(const enum CBLAS_TRANSPOSE TA, ATL_CINT M, ATL_CINT N,
                     ATL_CINT NRHS, TYPE *A, ATL_CINT lda,
                     TYPE *B, ATL_CINT ldb)
{
   TYPE work[2];
   TYPE *wrk;
   ATL_INT lwrk;
   int iret;
/*
 * Query routine for optimal workspace, allocate it, and call routine with it
 */
   ATL_assert(!Mjoin(PC2F,gels_wrk)(TA, M, N, NRHS, A, lda, B, ldb, work, -1));
   lwrk = work[0];
   wrk = malloc(ATL_MulBySize(lwrk));
   ATL_assert(wrk);
   iret = Mjoin(PC2F,gels_wrk)(TA, M, N, NRHS, A, lda, B, ldb, wrk, lwrk);
   free(wrk);
   return(iret);
}
Esempio n. 24
0
FLSTRUCT *ATL_GetFlushStruct(void *p, int length, FLSTRUCT *next)
{
    FLSTRUCT *fp;

    fp = malloc(sizeof(FLSTRUCT));
    ATL_assert(fp);
    fp->p = p;
    fp->length = length;
    fp->next = next;

    return(fp);
}
Esempio n. 25
0
void ATL_UGER2K
   (ATL_CINT M, ATL_CINT N, const TYPE *X0, const TYPE *Y0,
    const TYPE *X1, const TYPE *Y1, TYPE *A, ATL_CINT lda)
{
   const TYPE *x0, *x1;
   register ATL_INT i, j;
   ATL_CINT incA = lda+lda + (lda<<2);

   ATL_assert((N/3)*3 == N);
   for (j=0; j < N; j += 3, A += incA, Y0 += 6, Y1 += 6)
      ATL_rk2(M, X0, X1, Y0, Y1, A, lda);
}
Esempio n. 26
0
int *GetIntList1(int ival)
/*
 * returns integer array with iarr[0] = 1, iarr[1] = ival
 */
{
   int *iarr;
   iarr = malloc(2*sizeof(int));
   ATL_assert(iarr);
   iarr[0] = 1;
   iarr[1] = ival;
   return(iarr);
}
Esempio n. 27
0
double ATL_ptflushcache(long long size)
/*
 * flush cache by reading enough mem; note that if the compiler gets
 * really smart, may be necessary to make vp a global variable so it
 * can't figure out it's not being modified other than during setup;
 * the fact that ATL_dzero is external will confuse most compilers
 */
{
    static void *vp=NULL;
    static double *cache=NULL;
    double dret=0.0;
    static long long i, N = 0;
    ATL_FC fct[ATL_NTHREADS];

    if (size < 0) /* flush cache */
    {
        ATL_assert(cache);
        for (i=0; i < ATL_NTHREADS; i++)
        {
            fct[i].N = N;
            fct[i].dp = cache+i*N;
        }
        ATL_goparallel(ATL_NTHREADS, ATL_DoWorkFC, fct, NULL);
    }
    else if (size > 0) /* initialize */
    {
        vp = malloc(ATL_Cachelen + (size * ATL_NTHREADS));
        ATL_assert(vp);
        cache = ATL_AlignPtr(vp);
        N = size / sizeof(double);
        ATL_dzero(N*ATL_NTHREADS, cache, 1);
    }
    else if (size == 0) /* free cache */
    {
        if (vp) free(vp);
        vp = cache = NULL;
        N = 0;
    }
    return(dret);
}
Esempio n. 28
0
main(int nargs, char **args)
{
   char pre, fnam[128], cta;
   enum ATLAS_TRANS TA;
   int MFLOP, M, N, lda, i, l2size;
   double mf, mfs[3];
   #ifdef TREAL
      TYPE alpha, beta;
   #else
      TYPE alpha[2], beta[2];
   #endif
   FILE *fp;

   GetFlags(nargs, args, &pre, &l2size, &MFLOP, &cta, &M, &N, SADD alpha, &lda,
            SADD beta, fnam);

   if (cta == 'N' || cta == 'n') TA = AtlasNoTrans;
   else TA = AtlasTrans;
   if (!FileExists(fnam))
   {
      fp = fopen(fnam, "w");
      ATL_assert(fp);
      for (i=0; i < 3; i++)
      {
         mf = gemvcase(MFLOP, TA, l2size, M, N, alpha, lda, beta);
         fprintf(stdout, "      %s : %f MFLOPS\n", fnam, mf);
         fprintf(fp, "%lf\n", mf);
         mfs[i] = mf;
      }
   }
   else
   {
      fp = fopen(fnam, "r");
      for (i=0; i < 3; i++) ATL_assert(fscanf(fp, " %lf", &mfs[i]) == 1);
   }
   fclose(fp);
   mf = (mfs[0] + mfs[1] + mfs[2]) / 3.0;
   fprintf(stdout, "   %s : %.2f MFLOPS\n", fnam, mf);
   exit(0);
}
Esempio n. 29
0
int *IntRange2IntList(int N0, int NN, int incN)
{
   int i, n;
   int *iarr;

   for (i=N0, n=0; i <= NN; i += incN) n++;
   iarr = malloc(sizeof(int)*(n+1));
   ATL_assert(iarr);
   iarr[0] = n;
   for (i=N0, n=1 ; i <= NN; i += incN, n++)
      iarr[n] = i;
   return(iarr);
}
Esempio n. 30
0
int *GetIntList2(int ival1, int ival2)
/*
 * returns integer array with iarr[0] = 1, iarr[1] = ival1, ival[2] = ival2
 */
{
   int *iarr;
   iarr = malloc(3*sizeof(int));
   ATL_assert(iarr);
   iarr[0] = 1;
   iarr[1] = ival1;
   iarr[2] = ival2;
   return(iarr);
}