Ejemplo n.º 1
0
void col2blk(const int M, const int N, const TYPE *A, const int lda, TYPE *V,
             const SCALAR alpha)
{
   const int nMb = ATL_DivByNB(M), ib = M - ATL_MulByNB(nMb);
   const int incA = (lda - M)<<1, incv = ATL_MulByNB(N);
   const int incV = (incv<<1) - NB;
   int i, ii, j;
   TYPE *rp = V+ATL_MulByNB(N), *ip = V, *prp, *pip;
   #ifdef ALPHAXI0
      #ifdef Conj_
         const register TYPE ralpha = *alpha, calpha = -ralpha;
      #else
         const register TYPE ralpha = *alpha;
      #endif
   #elif defined(ALPHAX)
      const register TYPE ralpha = *alpha, ialpha = alpha[1];
      register TYPE ra, ia;
   #endif

   pip = V + (M-ib)*(N<<1);
   prp = pip + ib*N;

   for (j=N; j; j--, V += NB, A += incA)
   {
      ip = V;
      rp = V + incv;
      for (ii=nMb; ii; ii--, rp += incV, ip += incV)
      {
         for (i=NB; i; i--, A += 2, rp++, ip++) scalcp(A, rp, ip);
      }
      for (i=ib; i; i--, A += 2, prp++, pip++) scalcp(A, prp, pip);
   }
}
Ejemplo n.º 2
0
static void row2blkT_NB(const int M, const int N, const TYPE *A, const int lda,
                        TYPE *vr, TYPE *vi, const SCALAR alpha)
{
   const int incA = lda<<2, incv = 2 - NBNB;
   const TYPE *pA0 = A, *pA1 = A + (lda<<1);
   int i, j;
   #ifdef ALPHAXI0
      #ifdef Conj_
         const register TYPE ralpha = *alpha, calpha = -ralpha;
      #else
         const register TYPE ralpha = *alpha;
      #endif
   #elif defined(ALPHAX)
      const register TYPE ralpha = *alpha, ialpha = alpha[1];
      register TYPE ra, ia;
   #endif

   #if ((NB/2)*2 != NB)  /* ATLAS should ensure NB divisible by 2 */
      ATL_assert((NB/2)*2 == NB);
   #endif
   for (j=(NB>>1); j; j --, pA0 += incA, pA1 += incA, vr += incv, vi += incv)
   {
      for (i=0; i != NB2; i += 2, vr += NB, vi += NB)
      {
         scalcp(pA0+i, vr, vi);
         scalcp(pA1+i, vr+1, vi+1);
      }
   }
}
Ejemplo n.º 3
0
void Mjoin(prow2blkT,_blk)(const int blk, const int M, const int N,
                           const SCALAR alpha, const TYPE *A, int lda,
                           const int ldainc, TYPE *V)
/*
 * Given a packed Upper matrix A, copies & transposes M rows starting at A into
 * block-major row panel
 *    ldainc =  0 : General rectangular
 *    ldainc =  1 : Upper
 *    ldainc = -1 : Lower
 */
{
   const int kb = Mmin(blk,N);
   const int ncb = N / kb, nr = N - ncb*kb;
   const int incV = kb*M - kb;
   const int VN = kb*M, vn = nr*M;
   int jb, i, j;
   TYPE *v;
   #ifdef ALPHAXI0
      #ifdef Conj_
         const register TYPE ralpha = *alpha, calpha = -ralpha;
      #else
         const register TYPE ralpha = *alpha;
      #endif
   #elif defined(ALPHAX)
      register const TYPE ralpha=(*alpha), ialpha = alpha[1];
      register TYPE ra, ia;
   #endif

   if (ldainc == -1) lda--;
   lda -= M;
   lda += lda;
   for (jb=ncb; jb; jb--)
   {
      for (j=kb; j; j--)
      {
         v = V++;
         for (i=0; i != M; i++, v += kb, A += 2) scalcp(A, v+VN, v);
         A += lda;
         lda += ldainc;
      }
      V += incV;
   }
   for (j=nr; j; j--)
   {
      v = V++;
      for (i=0; i != M; i++, v += nr, A += 2) scalcp(A, v+vn, v);
      A += lda;
      lda += ldainc;
   }
}
Ejemplo n.º 4
0
void Mjoin(pcol2blk,_blk)(const int blk, const int M, const int N,
                          const SCALAR alpha, const TYPE *A, int lda,
                          const int ldainc, TYPE *V)
/*
 * Given a packed matrix A, copies N columns starting at A into
 * block-major column panel
 *    ldainc =  0 : General
 *    ldainc =  1 : Upper
 *    ldainc = -1 : Lower
 * NOTE: specialize to alpha cases after it works!
 */
{
   const int kb = Mmin(M,blk);
   const int nrb = M / kb, mr = M - nrb*kb;
   const int nv = kb*N, nvv = mr*N;
   const int NN = nv+nv - kb;
   const int ldainc2 = ldainc+ldainc, M2 = M+M;
   int i, ib, j, J;
   TYPE *v = V + nrb*(NN+kb);
   #ifdef ALPHAXI0
      #ifdef Conj_
         const register TYPE ralpha = *alpha, calpha = -ralpha;
      #else
         const register TYPE ralpha = *alpha;
      #endif
   #elif defined(ALPHAX)
      const register TYPE ralpha=(*alpha), ialpha = alpha[1];
      register TYPE ra, ia;
   #endif

   if (ldainc == -1) lda--;
   lda += lda;
   ATL_assert(N <= blk);
   for (j=0; j != N; j++)
   {
      for (ib=nrb; ib; ib--)
      {
         for (i=0; i < kb; i++, A += 2, V++) scalcp(A, V+nv, V);
         V += NN;
      }
      if (mr)
      {
         for (i=0; i < mr; i++, A += 2, v++) scalcp(A, v+nvv, v);
      }
      V += kb - nrb*(NN+kb);
      A += lda - M2;
      lda += ldainc2;
   }
}
Ejemplo n.º 5
0
void prow2blk_KB(const int mb, const int nb, const SCALAR alpha, const TYPE *A,
                 int lda, const int ldainc, TYPE *V)
/*
 * This routine used by full copy to copy one mbxnb block of a matrix A to
 * block-major nbxmb storage (A is transposed during the copy)
 */
{
   TYPE *v;
   const int mn = mb * nb, ldainc2 = ldainc+ldainc;
   int i, j;
   #ifdef ALPHAXI0
      #ifdef Conj_
         const register TYPE ralpha = *alpha, calpha = -ralpha;
      #else
         const register TYPE ralpha = *alpha;
      #endif
   #elif defined(ALPHAX)
      register const TYPE ralpha=(*alpha), ialpha = alpha[1];
      register TYPE ra, ia;
   #endif

   if (ldainc == -1) lda--;
   lda -= mb;
   lda += lda;
   for (j=nb; j; j--)
   {
      v = V++;
      for (i=0; i != mb; i++, v += nb, A += 2) scalcp(A, v+mn, v);
      A += lda;
      lda += ldainc2;
   }
}
Ejemplo n.º 6
0
static void row2blkT_KB(const int M, const int N, const TYPE *A, const int lda,
                        TYPE *vr, TYPE *vi, const SCALAR alpha)
{
   const int M2 = M<<1, lda2 = lda<<1, incv = 1 - M*N;
   int i, j;
   #ifdef ALPHAXI0
      #ifdef Conj_
         const register TYPE ralpha = *alpha, calpha = -ralpha;
      #else
         const register TYPE ralpha = *alpha;
      #endif
   #elif defined(ALPHAX)
      const register TYPE ralpha = *alpha, ialpha = alpha[1];
      register TYPE ra, ia;
   #endif

   for (j=N; j; j--, A += lda2, vr += incv, vi += incv)
   {
      for (i=0; i != M2; i += 2, vr += N, vi += N) scalcp(A+i, vr, vi);
   }
}