Exemplo n.º 1
0
   int Mjoin(PATL,syr2kLT)
#endif
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
   int i;
   void *vc=NULL;
   TYPE *c;
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR beta =*( (const SCALAR *)vbeta  );
      const SCALAR one=1.0, zero=0.0;
   #else
      #define alpha valpha
      const TYPE *beta=vbeta;
      const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};
   #endif

   i = ATL_MulBySize(N)*N;
   if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i);
   if (vc == NULL) return(1);
   c = ATL_AlignPtr(vc);
   CgemmTN(N, N, K, alpha, A, lda, B, ldb, zero, c, N);
   if ( SCALAR_IS_ONE(beta) ) Mjoin(syr2k_put,_b1)(N, c, beta, C, ldc);
   else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr2k_put,_b0)(N, c, beta, C, ldc);
   #ifdef TCPLX
      else if (SCALAR_IS_NONE(beta)) Mjoin(syr2k_put,_bn1)(N, c, beta, C, ldc);
      else if (beta[1] == *zero) Mjoin(syr2k_put,_bXi0)(N, c, beta, C, ldc);
   #endif
   else Mjoin(syr2k_put,_bX)(N, c, beta, C, ldc);
   free(vc);
   return(0);
}
Exemplo n.º 2
0
void Mjoin(Mjoin(Mjoin(PATL,syrk),UploNM),T)
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *vbeta, void *C, const int ldc)
{
   void *vc;
   TYPE *c;
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR beta =*( (const SCALAR *)vbeta  );
      const SCALAR one=1.0, zero=0.0;
   #else
      #define alpha valpha
      const TYPE *beta=vbeta;
      const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};
   #endif

   if (K > SYRK_Xover)
   {
      vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);
      ATL_assert(vc);
      c = ATL_AlignPtr(vc);
      CgemmTN(N, N, K, alpha, A, lda, A, lda, zero, c, N);
      if ( SCALAR_IS_ONE(beta) ) Mjoin(syr_put,_b1)(N, c, beta, C, ldc);
      else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr_put,_b0)(N, c, beta, C, ldc);
      #ifdef TCPLX
         else if ( SCALAR_IS_NONE(beta) )
            Mjoin(syr_put,_bn1)(N, c, beta, C, ldc);
         else if (beta[1] == *zero) Mjoin(syr_put,_bXi0)(N, c, beta, C, ldc);
      #endif
      else Mjoin(syr_put,_bX)(N, c, beta, C, ldc);
      free(vc);
   }
   else Mjoin(PATL,refsyrk)(Uplo_, AtlasTrans, N, K, alpha, A, lda,
                            beta, C, ldc);
}
Exemplo n.º 3
0
int Mjoin(PATL,mmIJK)(const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB,
                      const int M, const int N0, const int K,
                      const SCALAR alpha, const TYPE *A, const int lda0,
                      const TYPE *B, const int ldb0, const SCALAR beta,
                      TYPE *C, const int ldc0)
{
    size_t incA, incB, incC;
    const size_t lda=lda0, ldb=ldb0, ldc=ldc0;
    const size_t incK = ATL_MulByNB((size_t)K);
    int N = N0;
    int nMb, nNb, nKb, ib, jb, kb, jb2, h, i, j, k, n;
    void *vA=NULL, *vC=NULL;
    TYPE *pA, *pB, *pC;
    MAT2BLK A2blk, B2blk;
    PUTBLK putblk;
    NBMM0 NBmm0;

    nMb = ATL_DivByNB(M);
    nNb = ATL_DivByNB(N);
    nKb = ATL_DivByNB(K);
    ib = M - ATL_MulByNB(nMb);
    jb = N - ATL_MulByNB(nNb);
    kb = K - ATL_MulByNB(nKb);

    /*
     * If K sufficiently large, write to temporary C as safety measure;  otherwise
     * write directly to C
     */
    if (nKb < 12)
    {
        putblk = NULL;
        pC = C;
        if ( SCALAR_IS_ONE(beta) ) NBmm0 = NBmm_b1;
        else if ( SCALAR_IS_ZERO(beta) ) NBmm0 = NBmm_b0;
        else NBmm0 = NBmm_bX;
    }
    else
    {
        NBmm0 = NBmm_b0;
        vC = malloc(ATL_Cachelen + ATL_MulBySize(NBNB));
        if (!vC) return(-1);
        pC = ATL_AlignPtr(vC);
        if ( SCALAR_IS_ONE(beta) ) putblk = Mjoin(PATL,putblk_b1);
        else if ( SCALAR_IS_ZERO(beta) ) putblk = Mjoin(PATL,putblk_b0);
        else if ( SCALAR_IS_NONE(beta) ) putblk = Mjoin(PATL,putblk_bn1);
        else putblk = Mjoin(PATL,putblk_bX);
    }
    /*
     * Special case if we don't need to copy one or more input matrix
     */
    if (K == NB && TB == AtlasNoTrans && ldb == NB && ATL_DataIsMinAligned(B))
    {
        if (lda == NB && TA == AtlasTrans && SCALAR_IS_ONE(alpha) &&
                ATL_DataIsMinAligned(A))
        {
            i = NBNB;
            pA = (TYPE *) A;
            A = NULL;
            A2blk = NULL;
            incA = 0;
        }
        else
        {
            vA = malloc(ATL_Cachelen + ATL_MulBySize(incK));
            if (!vA)
            {
                free(vC);
                return(-1);
            }
            pA = ATL_AlignPtr(vA);
            if (TA == AtlasNoTrans)
            {
                incA = NB;
                if ( SCALAR_IS_ONE(alpha) ) A2blk = Mjoin(PATL,row2blkT_a1);
                else A2blk = Mjoin(PATL,row2blkT_aX);
            }
            else
            {
                incA = ATL_MulByNB(lda);
                if ( SCALAR_IS_ONE(alpha) ) A2blk = Mjoin(PATL,col2blk_a1);
                else A2blk = Mjoin(PATL,col2blk_aX);
            }
        }
        Mjoin(PATL,mmIJK2)(K, nMb, nNb, nKb, ib, jb, kb, alpha, A, lda, pA,
                           incA, A2blk, B, beta, C, ldc, pC, putblk, NBmm0);
        if (vA) free(vA);
        if (vC) free(vC);
        return(0);
    }
    i = ATL_Cachelen + ATL_MulBySize(N*K + incK);
    if (i <= ATL_MaxMalloc) vA = malloc(i);
    if (!vA)
    {
        if (TA == AtlasNoTrans && TB == AtlasNoTrans)
        {
            if (vC) free(vC);
            return(1);
        }
        if (jb) n = nNb + 1;
        else n = nNb;
        for (j=2; !vA; j++)
        {
            k = n / j;
            if (k < 1) break;
            if (k*j < n) k++;
            h = ATL_Cachelen + ATL_MulBySize((k+1)*incK);
            if (h <= ATL_MaxMalloc) vA = malloc(h);
        }
        if (!vA)
        {
            if (vC) free(vC);
            return(-1);
        }
        n = ATL_MulByNB(k);
        jb2 = 0;
    }
    else
    {
        jb2 = jb;
        k = nNb;
        n = N;
    }
    pA = ATL_AlignPtr(vA);
    if (TB == AtlasNoTrans)
    {
        incB = ldb*n;
        if ( SCALAR_IS_ONE(alpha) ) B2blk = Mjoin(PATL,col2blk2_a1);
        else B2blk = Mjoin(PATL,col2blk2_aX);
    }
    else
    {
        incB = n;
        if ( SCALAR_IS_ONE(alpha) ) B2blk = Mjoin(PATL,row2blkT2_a1);
        else B2blk = Mjoin(PATL,row2blkT2_aX);
    }
    if (TA == AtlasNoTrans)
    {
        incA = NB;
        A2blk = Mjoin(PATL,row2blkT_a1);
    }
    else
    {
        incA = ATL_MulByNB(lda);
        A2blk = Mjoin(PATL,col2blk_a1);
    }
    incC = ldc*n;
    pB = pA + incK;

    do
    {
        if (TB == AtlasNoTrans) B2blk(K, n, B, ldb, pB, alpha);
        else B2blk(n, K, B, ldb, pB, alpha);
        Mjoin(PATL,mmIJK2)(K, nMb, k, nKb, ib, jb2, kb, alpha, A, lda, pA,
                           incA, A2blk, pB, beta, C, ldc, pC, putblk, NBmm0);
        N -= n;
        nNb -= k;
        if (N < n)
        {
            jb2 = jb;
            n = N;
            k = nNb;
        }
        C += incC;
        B += incB;
        if (!putblk) pC = C;
    }
    while (N);

    if (vC) free(vC);
    free(vA);
    return(0);
}
Exemplo n.º 4
0
void Mjoin(PATL,pputblk_diag)
   (const int M, const int N, const TYPE *V, const enum ATLAS_UPLO UC,
    TYPE *C, int ldc, int ldcinc, const SCALAR alpha, const SCALAR beta)
/*
 * Copies only the Upper or Lower portion of V to C
 */
{
   int i, j;

   if (UC == AtlasUpper)
   {
      if (SCALAR_IS_ZERO(beta))
      {
         if (SCALAR_IS_ONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               for (i=0; i <= j; i++) C[i] = V[i];
               C += ldc;
               V += M;
               ldc += ldcinc;
            }
         }
         else if (SCALAR_IS_NONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               for (i=0; i <= j; i++) C[i] = -V[i];
               C += ldc;
               V += M;
               ldc += ldcinc;
            }
         }
         else
         {
            for (j=0; j < N; j++)
            {
               for (i=0; i <= j; i++) C[i] = alpha * V[i];
               C += ldc;
               V += M;
               ldc += ldcinc;
            }
         }
      }
      else if (SCALAR_IS_ONE(beta))
      {
         if (SCALAR_IS_ONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               for (i=0; i <= j; i++) C[i] += V[i];
               C += ldc;
               V += M;
               ldc += ldcinc;
            }
         }
         else if (SCALAR_IS_NONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               for (i=0; i <= j; i++) C[i] -= V[i];
               C += ldc;
               V += M;
               ldc += ldcinc;
            }
         }
         else
         {
            for (j=0; j < N; j++)
            {
               for (i=0; i <= j; i++) C[i] += alpha * V[i];
               C += ldc;
               V += M;
               ldc += ldcinc;
            }
         }
      }
      else
      {
         if (SCALAR_IS_ONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               for (i=0; i <= j; i++) C[i] = beta*C[i] + V[i];
               C += ldc;
               V += M;
               ldc += ldcinc;
            }
         }
         else if (SCALAR_IS_NONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               for (i=0; i <= j; i++) C[i] = beta*C[i] - V[i];
               C += ldc;
               V += M;
               ldc += ldcinc;
            }
         }
         else
         {
            for (j=0; j < N; j++)
            {
               for (i=0; i <= j; i++) C[i] = beta*C[i] + alpha * V[i];
               C += ldc;
               V += M;
               ldc += ldcinc;
            }
         }
      }
   }
   else
   {
      if (SCALAR_IS_ZERO(beta))
      {
         if (SCALAR_IS_NONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               ldc += ldcinc;
               for (i=j; i < M; i++) C[i] = -V[i];
               C += ldc;
               V += M;
            }
         }
         else if (SCALAR_IS_ONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               ldc += ldcinc;
               for (i=j; i < M; i++) C[i] = V[i];
               C += ldc;
               V += M;
            }
         }
         else
         {
            for (j=0; j < N; j++)
            {
               ldc += ldcinc;
               for (i=j; i < M; i++) C[i] = alpha * V[i];
               C += ldc;
               V += M;
            }
         }
      }
      else if (SCALAR_IS_ONE(beta))
      {
         if (SCALAR_IS_NONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               ldc += ldcinc;
               for (i=j; i < M; i++) C[i] -= V[i];
               C += ldc;
               V += M;
            }
         }
         else if (SCALAR_IS_ONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               ldc += ldcinc;
               for (i=j; i < M; i++) C[i] += V[i];
               C += ldc;
               V += M;
            }
         }
         else
         {
            for (j=0; j < N; j++)
            {
               ldc += ldcinc;
               for (i=j; i < M; i++) C[i] += alpha * V[i];
               C += ldc;
               V += M;
            }
         }
      }
      else
      {
         if (SCALAR_IS_NONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               ldc += ldcinc;
               for (i=j; i < M; i++) C[i] = beta*C[i] - V[i];
               C += ldc;
               V += M;
            }
         }
         else if (SCALAR_IS_ONE(alpha))
         {
            for (j=0; j < N; j++)
            {
               ldc += ldcinc;
               for (i=j; i < M; i++) C[i] = beta*C[i] + V[i];
               C += ldc;
               V += M;
            }
         }
         else
         {
            for (j=0; j < N; j++)
            {
               ldc += ldcinc;
               for (i=j; i < M; i++) C[i] = beta*C[i] + alpha * V[i];
               C += ldc;
               V += M;
            }
         }
      }
   }
}