Пример #1
0
void Mjoin( PATL, hpmv )
(
   const enum ATLAS_UPLO      UPLO,
   const int                  N,
   const SCALAR               ALPHA,
   const TYPE                 * A,
   const TYPE                 * X,
   const int                  INCX,
   const SCALAR               BETA,
   TYPE                       * Y,
   const int                  INCY
)
{
/*
 * Purpose
 * =======
 *
 * Mjoin( PATL, hpmv ) performs the matrix-vector operation
 *
 *    y := alpha * A * x + beta * y,
 *
 * where alpha and beta are scalars, x and y are n-element vectors and A
 * is an n by n Hermitian matrix, supplied in packed form.
 *
 * This is a blocked version of the algorithm.  For a more detailed des-
 * cription of  the arguments of this function, see the reference imple-
 * mentation in the ATLAS/src/blas/reference directory.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
   void                       (*gpmv0)( const int, const int, const SCALAR,
                              const TYPE *, const int, const TYPE *, const int,
                              const SCALAR, TYPE *, const int );
   void                       (*gpmv1)( const int, const int, const SCALAR,
                              const TYPE *, const int, const TYPE *, const int,
                              const SCALAR, TYPE *, const int );
   void                       (*gpmvN)( const int, const int, const SCALAR,
                              const TYPE *, const int, const TYPE *, const int,
                              const SCALAR, TYPE *, const int );
#ifdef TREAL
   TYPE                       alphaY, beta0;
#define one                   ATL_rone
#define zero                  ATL_rzero
#else
   const TYPE                 * alphaY, * beta0;
   const TYPE                 one [2] = { ATL_rone,  ATL_rzero },
                              zero[2] = { ATL_rzero, ATL_rzero };
#endif
   void                       * vx = NULL, * vy = NULL;
   TYPE                       * A0, * A1, * x, * x0, * x1, * y, * y00, * y0,
                              * y1;
   int                        incXY, incXY1, j, jb, lda, lda0, lda1, mb, mb1,
                              n, nb;
/* ..
 * .. Executable Statements ..
 *
 */
   if( N == 0 ) return;

   if( SCALAR_IS_ZERO( ALPHA ) )
   {
      if( !( SCALAR_IS_ONE( BETA ) ) ) Mjoin( PATL, scal )( N, BETA, Y, INCY );
      return;
   }

   if( ( INCX != 1 ) || ( ( INCY == 1 ) && !( SCALAR_IS_ONE( ALPHA ) ) ) )
   {
      vx = (void *)malloc( ATL_Cachelen + ATL_MulBySize( N ) );
      ATL_assert( vx ); x = ATL_AlignPtr( vx );
      Mjoin( PATL, cpsc )( N, ALPHA, X, INCX, x, 1 );
      alphaY = one;
   }
   else { x = (TYPE *)(X); alphaY = ALPHA; }

   if( ( INCY != 1 ) || !( SCALAR_IS_ONE( alphaY ) ) )
   {
      vy = malloc( ATL_Cachelen + ATL_MulBySize( N ) );
      ATL_assert( vy ); y00 = y = ATL_AlignPtr( vy );
      beta0 = zero;
   }
   else { y00 = y = (TYPE *)(Y); beta0 = BETA; }

   ATL_GetPartSPMV( A, N, &mb, &nb );

   mb1 = N - ( ( N - 1 ) / mb ) * mb; incXY1 = (nb SHIFT);

   if( UPLO == AtlasUpper )
   {
      if(      SCALAR_IS_ZERO( beta0 ) )
         gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_b0_y1 );
      else if( SCALAR_IS_ONE ( beta0 ) )
         gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_b1_y1 );
      else
         gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_bX_y1 );
      gpmv1 = Mjoin( PATL, gpmvUC_a1_x1_b1_y1 );
      gpmvN = Mjoin( PATL, gpmvUN_a1_x1_b1_y1 );

      lda = 1; lda0 = lda; A0 = (TYPE *)(A); MUpnext( mb, A0, lda0 );
      incXY = (mb SHIFT); x0 = x + incXY; y0 = y + incXY;

      for( n = N - mb; n > 0; n -= mb, x0 += incXY, x += incXY,
           y0 += incXY, y += incXY )
      {
         Mjoin( PATL, hpmvU )( mb, A, lda, x, beta0, y );

         for( j = 0, lda1 = lda0, A1 = A0 - (mb SHIFT), x1 = x0, y1 = y0; j < n;
              j += nb, x1 += incXY1, y1 += incXY1 )
         {
            jb = n - j; jb = Mmin( jb, nb );
            gpmv0( jb, mb, one, A1, lda1, x,  1, beta0, y1, 1 );
            gpmvN( mb, jb, one, A1, lda1, x1, 1, one,   y,  1 );
            MUpnext( jb, A1, lda1 ); A1 -= (jb SHIFT);
         }
         beta0 = one; gpmv0 = gpmv1; lda = lda0; A = A0; MUpnext( mb, A0, lda0 );
      }
      Mjoin( PATL, hpmvU )( mb1, A, lda, x, beta0, y );
   }
   else
   {
      if(      SCALAR_IS_ZERO( beta0 ) )
         gpmv0 = Mjoin( PATL, gpmvLC_a1_x1_b0_y1 );
      else if( SCALAR_IS_ONE ( beta0 ) )
         gpmv0 = Mjoin( PATL, gpmvLC_a1_x1_b1_y1 );
      else
         gpmv0 = Mjoin( PATL, gpmvLC_a1_x1_bX_y1 );
      gpmv1 = Mjoin( PATL, gpmvLC_a1_x1_b1_y1 );
      gpmvN = Mjoin( PATL, gpmvLN_a1_x1_b1_y1 );

      lda = N; lda0 = lda; A0 = (TYPE *)(A); MLpnext( N, A, lda );
      incXY = (mb SHIFT); x0 = x; y0 = y;

      for( n  = N - mb, x += ((N-mb) SHIFT), y += ((N-mb) SHIFT); n > 0;
           n -= mb, x -= incXY, y -= incXY )
      {
         MLpprev( mb, A, lda );
         Mjoin( PATL, hpmvL )( mb, A, lda, x, beta0, y );

         for( j = 0, lda1 = lda0, A1 = A0 + (n SHIFT), x1 = x0, y1 = y0; j < n;
              j += nb, x1 += incXY1, y1 += incXY1 )
         {
            jb = n - j; jb = Mmin( jb, nb );
            gpmv0( jb, mb, one, A1, lda1, x,  1, beta0, y1, 1 );
            gpmvN( mb, jb, one, A1, lda1, x1, 1, one,   y,  1 );
            MLpnext( jb, A1, lda1 ); A1 -= (jb SHIFT);
         }
         beta0 = one; gpmv0 = gpmv1;
      }
      Mjoin( PATL, hpmvL )( mb1, A0, lda0, x0, beta0, y0 );
   }

   if( vx ) free( vx );
   if( vy )
   { Mjoin( PATL, axpby )( N, alphaY, y00, 1, BETA, Y, INCY ); free( vy ); }
/*
 * End of Mjoin( PATL, hpmv )
 */
}
Пример #2
0
void Mjoin( PATL, spr2L )
(
   const int                  N,
   const TYPE                 * X,
   const TYPE                 * Y,
   TYPE                       * A,
   const int                  LDA
)
{
/*
 * Purpose
 * =======
 *
 * Mjoin( PATL, spr2L ) performs the symmetric rank 2 operation
 *
 *    A := alpha * x * y' + alpha * y * x' + A,
 *
 * where  alpha is a scalar, x and y are n-element vectors and A is an n
 * by n symmetric matrix, supplied in packed form.
 *
 * This is a  recursive  version of the  algorithm.  For a more detailed
 * description of  the arguments of this function, see the reference im-
 * plementation in the  ATLAS/src/blas/reference directory.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
#ifdef TREAL
#define    one                ATL_rone
#else
   const TYPE                 one[2] = { ATL_rone, ATL_rzero };
#endif
   TYPE                       * A0, * x0, * y0;
   int                        j, jb, jbs, lda = LDA, m, mb, nb;
#ifdef TREAL
#define   gpr      Mjoin( PATL, gpr1L_a1_x1_yX  )
#else
#define   gpr      Mjoin( PATL, gpr1uL_a1_x1_yX )
#endif
/* ..
 * .. Executable Statements ..
 *
 */
   ATL_GetPartP1( A, LDA, mb, nb );

   x0 = (TYPE *)(X); y0 = (TYPE *)(Y);

   for( j = 0; j < N; j += nb )
   {
      jb = N - j; jb = Mmin( jb, nb );
      Mjoin( PATL, refspr2L )( jb, one, X, 1, Y, 1, A, lda );
      if( ( m = N-j-jb ) != 0 )
      {
         jbs = (jb SHIFT); A0 = (TYPE *)(A) + jbs; X += jbs; Y += jbs;
         gpr( m, jb, one, X, 1, y0, 1, A0, lda );
         gpr( m, jb, one, Y, 1, x0, 1, A0, lda );
         MLpnext( jb, A, lda ); x0 = (TYPE *)(X); y0 = (TYPE *)(Y);
      }
   }
/*
 * End of Mjoin( PATL, spr2L )
 */
}
Пример #3
0
void Mjoin( PATL, tpmvLC )
(
   const enum ATLAS_DIAG      DIAG,
   const int                  N,         /* N > 0 assumed */
   const TYPE                 * A,
   const int                  LDA,
   TYPE                       * X
)
{
/*
 * Purpose
 * =======
 *
 * Mjoin( PATL, tpmvLC ) performs the following matrix-vector operation
 *
 *    x := conjg( A  ) * x,
 *
 * where x is an n-element vector and  A is an n by n unit or  non-unit,
 * lower triangular matrix supplied in packed form.
 *
 * This is a blocked version of the algorithm.  For a more detailed des-
 * cription of  the arguments of this function, see the reference imple-
 * mentation in the  ATLAS/src/blas/reference directory.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
#ifdef TREAL
#define    one                ATL_rone
#else
   TYPE                       one[2] = { ATL_rone, ATL_rzero };
#endif
#ifdef ATL_AXPYMV
   TYPE                       * A0, * x0;
   int                        incX, lda, lda0, mb, mb1, n, nb;
#else
   TYPE                       * x0;
   int                        incX, lda, m, mb, n1, nb, nb1;
#endif
   void                       (*tpmv0)( const int, const TYPE *, const int,
                              TYPE * );
#define    gpmv0              Mjoin( PATL, gpmvLNc_a1_x1_b1_y1  )
/* ..
 * .. Executable Statements ..
 *
 */
   ATL_GetPartMVN( A, LDA, &mb, &nb );

   if( DIAG == AtlasNonUnit ) tpmv0 = Mjoin( PATL, tpmvLCN );
   else                       tpmv0 = Mjoin( PATL, tpmvLCU );

#ifdef ATL_AXPYMV
   mb1  = N - ( ( N - 1 ) / mb ) * mb; incX = (mb SHIFT); x0 = X;
   lda = lda0 = LDA; A0 = (TYPE *)(A); MLpnext( N, A, lda );

   for( n = N - mb, X += ((N-mb) SHIFT); n > 0; n -= mb, X -= incX )
   {
      MLpprev( mb, A, lda ); tpmv0( mb, A, lda, X );
      gpmv0( mb, n, one, A0 + (n SHIFT), lda0, x0, 1, one, X, 1 );
   }
   tpmv0( mb1, A0, lda0, x0 );
#else
   nb1  = N - ( n1  = ( ( N - 1 ) / nb ) * nb );
   incX = (nb SHIFT); x0 = ( X += (n1 SHIFT) );
   lda = LDA; MLpnext( n1, A, lda ); tpmv0( nb1, A, lda, X ); x0 = X; X -= incX;
   for( m = nb1; m < N; m += nb, X -= incX, x0 -= incX )
   {
      MLpprev( nb, A, lda );
      gpmv0( m, nb, one, A + (nb SHIFT), lda, X, 1, one, x0, 1 );
      tpmv0( nb, A, lda, X );
   }
#endif
/*
 * End of Mjoin( PATL, tpmvLC )
 */
}
Пример #4
0
void Mjoin( PATL, tpsvLH )
(
   const enum ATLAS_DIAG      DIAG,
   const int                  N,         /* N > 0 assumed */
   const TYPE                 * A,
   const int                  LDA,
   TYPE                       * X
)
{
/*
 * Purpose
 * =======
 *
 * Mjoin( PATL, tpsvLH ) solves the following triangular system of equations
 *
 *    conjg( A' ) * x = b,
 *
 * where b and x are n-element vectors and  A  is an n by n unit or non-
 * unit, lower triangular matrix supplied in packed form.
 *
 * No test for  singularity  or  near-singularity  is included  in  this
 * routine. Such tests must be performed before calling this routine.
 *
 * This is a blocked version of the algorithm.  For a more detailed des-
 * cription of  the arguments of this function, see the reference imple-
 * mentation in the  ATLAS/src/blas/reference directory.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
   void  (*tpsv0)( const int, const TYPE *, const int, TYPE * );
#ifdef TREAL
#define    none               ATL_rnone
#define    one                ATL_rone
#else
   TYPE                       none[2] = { ATL_rnone, ATL_rzero },
                              one [2] = { ATL_rone,  ATL_rzero };
#endif
   TYPE                       * A0, * x0;
   int                        incX, lda = LDA, mb, mb1, n, nb;
/* ..
 * .. Executable Statements ..
 *
 */
   ATL_GetPartMVT( A, N, &mb, &nb );

   if( DIAG == AtlasNonUnit ) tpsv0 = Mjoin( PATL, tpsvLHN );
   else                       tpsv0 = Mjoin( PATL, tpsvLHU );

   mb1  = N - ( ( N - 1 ) / mb ) * mb; incX = (mb SHIFT); x0 = X;
   A0 = (TYPE *)(A); MLpnext( N-mb, A, lda );

   for( n  = N - mb, X += ((N-mb) SHIFT); n > 0; n -= mb, X -= incX )
   {
      tpsv0( mb, A, lda, X ); MLpprev( mb, A, lda );
      Mjoin( PATL, gpmv )( AtlasLower, AtlasConjTrans, n, mb, none,
                           A0 + (n SHIFT), LDA, X, 1, one, x0, 1 );
   }
   tpsv0( mb1, A0, LDA, x0 );
/*
 * End of Mjoin( PATL, tpsvLH )
 */
}
Пример #5
0
void Mjoin( PATL, spmvL )
(
   const int                  N,
   const TYPE                 * A,
   const int                  LDA,
   const TYPE                 * X,
   const SCALAR               BETA,
   TYPE                       * Y
)
{
/*
 * Purpose
 * =======
 *
 * Mjoin( PATL, spmvL ) performs the matrix-vector operation
 *
 *    y := alpha * A * x + beta * y,
 *
 * where alpha and beta are scalars, x and y are n-element vectors and A
 * is an n by n symmetric matrix, supplied in packed form.
 *
 * This is a  recursive  version of the  algorithm.  For a more detailed
 * description of  the arguments of this function, see the reference im-
 * plementation in the  ATLAS/src/blas/reference directory.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
   void                       (*gpmvT)( const int, const int, const SCALAR,
                              const TYPE *, const int, const TYPE *, const int,
                              const SCALAR, TYPE *, const int );
   void                       (*gpmvN)( const int, const int, const SCALAR,
                              const TYPE *, const int, const TYPE *, const int,
                              const SCALAR, TYPE *, const int );
#ifdef TREAL
#define    one                ATL_rone
   TYPE                       beta0;
#else
   const TYPE                 * beta0, one[2] = { ATL_rone, ATL_rzero };
#endif
   TYPE                       * A0, * x0, * y0;
   int                        j, jb, jbs, lda = LDA, m, mb, nb;
/* ..
 * .. Executable Statements ..
 *
 */
   ATL_GetPartSPMV( A, LDA, &mb, &nb );

   beta0 = BETA; x0 = (TYPE *)(X); y0 = (TYPE *)(Y);
   gpmvT = Mjoin( PATL, gpmvLT_a1_x1_b1_y1 );
   if(      SCALAR_IS_ZERO( beta0 ) ) gpmvN = Mjoin( PATL, gpmvLN_a1_x1_b0_y1 );
   else if( SCALAR_IS_ONE ( beta0 ) ) gpmvN = Mjoin( PATL, gpmvLN_a1_x1_b1_y1 );
   else                               gpmvN = Mjoin( PATL, gpmvLN_a1_x1_bX_y1 );

   for( j = 0; j < N; j += nb )
   {
      jb = N - j; jb = Mmin( jb, nb );
      Mjoin( PATL, refspmvL )( jb, one, A, lda, X, 1, beta0, Y, 1 );

      if( ( m = N-j-jb ) != 0 )
      {
         jbs = (jb SHIFT); A0 = (TYPE *)(A) + jbs; X += jbs; Y += jbs;
         gpmvT( jb, m, one, A0, lda, X,  1, one,   y0, 1 );
         gpmvN( m, jb, one, A0, lda, x0, 1, beta0, Y,  1 );
         beta0 = one; gpmvN = Mjoin( PATL, gpmvLN_a1_x1_b1_y1 );
         MLpnext( jb, A, lda ); x0 = (TYPE *)(X); y0 = (TYPE *)(Y);
      }
   }
/*
 * End of Mjoin( PATL, spmvL )
 */
}