void Mjoin( PATL, hpmv ) ( const enum ATLAS_UPLO UPLO, const int N, const SCALAR ALPHA, const TYPE * A, const TYPE * X, const int INCX, const SCALAR BETA, TYPE * Y, const int INCY ) { /* * Purpose * ======= * * Mjoin( PATL, hpmv ) performs the matrix-vector operation * * y := alpha * A * x + beta * y, * * where alpha and beta are scalars, x and y are n-element vectors and A * is an n by n Hermitian matrix, supplied in packed form. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ void (*gpmv0)( const int, const int, const SCALAR, const TYPE *, const int, const TYPE *, const int, const SCALAR, TYPE *, const int ); void (*gpmv1)( const int, const int, const SCALAR, const TYPE *, const int, const TYPE *, const int, const SCALAR, TYPE *, const int ); void (*gpmvN)( const int, const int, const SCALAR, const TYPE *, const int, const TYPE *, const int, const SCALAR, TYPE *, const int ); #ifdef TREAL TYPE alphaY, beta0; #define one ATL_rone #define zero ATL_rzero #else const TYPE * alphaY, * beta0; const TYPE one [2] = { ATL_rone, ATL_rzero }, zero[2] = { ATL_rzero, ATL_rzero }; #endif void * vx = NULL, * vy = NULL; TYPE * A0, * A1, * x, * x0, * x1, * y, * y00, * y0, * y1; int incXY, incXY1, j, jb, lda, lda0, lda1, mb, mb1, n, nb; /* .. * .. Executable Statements .. * */ if( N == 0 ) return; if( SCALAR_IS_ZERO( ALPHA ) ) { if( !( SCALAR_IS_ONE( BETA ) ) ) Mjoin( PATL, scal )( N, BETA, Y, INCY ); return; } if( ( INCX != 1 ) || ( ( INCY == 1 ) && !( SCALAR_IS_ONE( ALPHA ) ) ) ) { vx = (void *)malloc( ATL_Cachelen + ATL_MulBySize( N ) ); ATL_assert( vx ); x = ATL_AlignPtr( vx ); Mjoin( PATL, cpsc )( N, ALPHA, X, INCX, x, 1 ); alphaY = one; } else { x = (TYPE *)(X); alphaY = ALPHA; } if( ( INCY != 1 ) || !( SCALAR_IS_ONE( alphaY ) ) ) { vy = malloc( ATL_Cachelen + ATL_MulBySize( N ) ); ATL_assert( vy ); y00 = y = ATL_AlignPtr( vy ); beta0 = zero; } else { y00 = y = (TYPE *)(Y); beta0 = BETA; } ATL_GetPartSPMV( A, N, &mb, &nb ); mb1 = N - ( ( N - 1 ) / mb ) * mb; incXY1 = (nb SHIFT); if( UPLO == AtlasUpper ) { if( SCALAR_IS_ZERO( beta0 ) ) gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_b0_y1 ); else if( SCALAR_IS_ONE ( beta0 ) ) gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_b1_y1 ); else gpmv0 = Mjoin( PATL, gpmvUC_a1_x1_bX_y1 ); gpmv1 = Mjoin( PATL, gpmvUC_a1_x1_b1_y1 ); gpmvN = Mjoin( PATL, gpmvUN_a1_x1_b1_y1 ); lda = 1; lda0 = lda; A0 = (TYPE *)(A); MUpnext( mb, A0, lda0 ); incXY = (mb SHIFT); x0 = x + incXY; y0 = y + incXY; for( n = N - mb; n > 0; n -= mb, x0 += incXY, x += incXY, y0 += incXY, y += incXY ) { Mjoin( PATL, hpmvU )( mb, A, lda, x, beta0, y ); for( j = 0, lda1 = lda0, A1 = A0 - (mb SHIFT), x1 = x0, y1 = y0; j < n; j += nb, x1 += incXY1, y1 += incXY1 ) { jb = n - j; jb = Mmin( jb, nb ); gpmv0( jb, mb, one, A1, lda1, x, 1, beta0, y1, 1 ); gpmvN( mb, jb, one, A1, lda1, x1, 1, one, y, 1 ); MUpnext( jb, A1, lda1 ); A1 -= (jb SHIFT); } beta0 = one; gpmv0 = gpmv1; lda = lda0; A = A0; MUpnext( mb, A0, lda0 ); } Mjoin( PATL, hpmvU )( mb1, A, lda, x, beta0, y ); } else { if( SCALAR_IS_ZERO( beta0 ) ) gpmv0 = Mjoin( PATL, gpmvLC_a1_x1_b0_y1 ); else if( SCALAR_IS_ONE ( beta0 ) ) gpmv0 = Mjoin( PATL, gpmvLC_a1_x1_b1_y1 ); else gpmv0 = Mjoin( PATL, gpmvLC_a1_x1_bX_y1 ); gpmv1 = Mjoin( PATL, gpmvLC_a1_x1_b1_y1 ); gpmvN = Mjoin( PATL, gpmvLN_a1_x1_b1_y1 ); lda = N; lda0 = lda; A0 = (TYPE *)(A); MLpnext( N, A, lda ); incXY = (mb SHIFT); x0 = x; y0 = y; for( n = N - mb, x += ((N-mb) SHIFT), y += ((N-mb) SHIFT); n > 0; n -= mb, x -= incXY, y -= incXY ) { MLpprev( mb, A, lda ); Mjoin( PATL, hpmvL )( mb, A, lda, x, beta0, y ); for( j = 0, lda1 = lda0, A1 = A0 + (n SHIFT), x1 = x0, y1 = y0; j < n; j += nb, x1 += incXY1, y1 += incXY1 ) { jb = n - j; jb = Mmin( jb, nb ); gpmv0( jb, mb, one, A1, lda1, x, 1, beta0, y1, 1 ); gpmvN( mb, jb, one, A1, lda1, x1, 1, one, y, 1 ); MLpnext( jb, A1, lda1 ); A1 -= (jb SHIFT); } beta0 = one; gpmv0 = gpmv1; } Mjoin( PATL, hpmvL )( mb1, A0, lda0, x0, beta0, y0 ); } if( vx ) free( vx ); if( vy ) { Mjoin( PATL, axpby )( N, alphaY, y00, 1, BETA, Y, INCY ); free( vy ); } /* * End of Mjoin( PATL, hpmv ) */ }
void Mjoin( PATL, spr2L ) ( const int N, const TYPE * X, const TYPE * Y, TYPE * A, const int LDA ) { /* * Purpose * ======= * * Mjoin( PATL, spr2L ) performs the symmetric rank 2 operation * * A := alpha * x * y' + alpha * y * x' + A, * * where alpha is a scalar, x and y are n-element vectors and A is an n * by n symmetric matrix, supplied in packed form. * * This is a recursive version of the algorithm. For a more detailed * description of the arguments of this function, see the reference im- * plementation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #ifdef TREAL #define one ATL_rone #else const TYPE one[2] = { ATL_rone, ATL_rzero }; #endif TYPE * A0, * x0, * y0; int j, jb, jbs, lda = LDA, m, mb, nb; #ifdef TREAL #define gpr Mjoin( PATL, gpr1L_a1_x1_yX ) #else #define gpr Mjoin( PATL, gpr1uL_a1_x1_yX ) #endif /* .. * .. Executable Statements .. * */ ATL_GetPartP1( A, LDA, mb, nb ); x0 = (TYPE *)(X); y0 = (TYPE *)(Y); for( j = 0; j < N; j += nb ) { jb = N - j; jb = Mmin( jb, nb ); Mjoin( PATL, refspr2L )( jb, one, X, 1, Y, 1, A, lda ); if( ( m = N-j-jb ) != 0 ) { jbs = (jb SHIFT); A0 = (TYPE *)(A) + jbs; X += jbs; Y += jbs; gpr( m, jb, one, X, 1, y0, 1, A0, lda ); gpr( m, jb, one, Y, 1, x0, 1, A0, lda ); MLpnext( jb, A, lda ); x0 = (TYPE *)(X); y0 = (TYPE *)(Y); } } /* * End of Mjoin( PATL, spr2L ) */ }
void Mjoin( PATL, tpmvLC ) ( const enum ATLAS_DIAG DIAG, const int N, /* N > 0 assumed */ const TYPE * A, const int LDA, TYPE * X ) { /* * Purpose * ======= * * Mjoin( PATL, tpmvLC ) performs the following matrix-vector operation * * x := conjg( A ) * x, * * where x is an n-element vector and A is an n by n unit or non-unit, * lower triangular matrix supplied in packed form. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #ifdef TREAL #define one ATL_rone #else TYPE one[2] = { ATL_rone, ATL_rzero }; #endif #ifdef ATL_AXPYMV TYPE * A0, * x0; int incX, lda, lda0, mb, mb1, n, nb; #else TYPE * x0; int incX, lda, m, mb, n1, nb, nb1; #endif void (*tpmv0)( const int, const TYPE *, const int, TYPE * ); #define gpmv0 Mjoin( PATL, gpmvLNc_a1_x1_b1_y1 ) /* .. * .. Executable Statements .. * */ ATL_GetPartMVN( A, LDA, &mb, &nb ); if( DIAG == AtlasNonUnit ) tpmv0 = Mjoin( PATL, tpmvLCN ); else tpmv0 = Mjoin( PATL, tpmvLCU ); #ifdef ATL_AXPYMV mb1 = N - ( ( N - 1 ) / mb ) * mb; incX = (mb SHIFT); x0 = X; lda = lda0 = LDA; A0 = (TYPE *)(A); MLpnext( N, A, lda ); for( n = N - mb, X += ((N-mb) SHIFT); n > 0; n -= mb, X -= incX ) { MLpprev( mb, A, lda ); tpmv0( mb, A, lda, X ); gpmv0( mb, n, one, A0 + (n SHIFT), lda0, x0, 1, one, X, 1 ); } tpmv0( mb1, A0, lda0, x0 ); #else nb1 = N - ( n1 = ( ( N - 1 ) / nb ) * nb ); incX = (nb SHIFT); x0 = ( X += (n1 SHIFT) ); lda = LDA; MLpnext( n1, A, lda ); tpmv0( nb1, A, lda, X ); x0 = X; X -= incX; for( m = nb1; m < N; m += nb, X -= incX, x0 -= incX ) { MLpprev( nb, A, lda ); gpmv0( m, nb, one, A + (nb SHIFT), lda, X, 1, one, x0, 1 ); tpmv0( nb, A, lda, X ); } #endif /* * End of Mjoin( PATL, tpmvLC ) */ }
void Mjoin( PATL, tpsvLH ) ( const enum ATLAS_DIAG DIAG, const int N, /* N > 0 assumed */ const TYPE * A, const int LDA, TYPE * X ) { /* * Purpose * ======= * * Mjoin( PATL, tpsvLH ) solves the following triangular system of equations * * conjg( A' ) * x = b, * * where b and x are n-element vectors and A is an n by n unit or non- * unit, lower triangular matrix supplied in packed form. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ void (*tpsv0)( const int, const TYPE *, const int, TYPE * ); #ifdef TREAL #define none ATL_rnone #define one ATL_rone #else TYPE none[2] = { ATL_rnone, ATL_rzero }, one [2] = { ATL_rone, ATL_rzero }; #endif TYPE * A0, * x0; int incX, lda = LDA, mb, mb1, n, nb; /* .. * .. Executable Statements .. * */ ATL_GetPartMVT( A, N, &mb, &nb ); if( DIAG == AtlasNonUnit ) tpsv0 = Mjoin( PATL, tpsvLHN ); else tpsv0 = Mjoin( PATL, tpsvLHU ); mb1 = N - ( ( N - 1 ) / mb ) * mb; incX = (mb SHIFT); x0 = X; A0 = (TYPE *)(A); MLpnext( N-mb, A, lda ); for( n = N - mb, X += ((N-mb) SHIFT); n > 0; n -= mb, X -= incX ) { tpsv0( mb, A, lda, X ); MLpprev( mb, A, lda ); Mjoin( PATL, gpmv )( AtlasLower, AtlasConjTrans, n, mb, none, A0 + (n SHIFT), LDA, X, 1, one, x0, 1 ); } tpsv0( mb1, A0, LDA, x0 ); /* * End of Mjoin( PATL, tpsvLH ) */ }
void Mjoin( PATL, spmvL ) ( const int N, const TYPE * A, const int LDA, const TYPE * X, const SCALAR BETA, TYPE * Y ) { /* * Purpose * ======= * * Mjoin( PATL, spmvL ) performs the matrix-vector operation * * y := alpha * A * x + beta * y, * * where alpha and beta are scalars, x and y are n-element vectors and A * is an n by n symmetric matrix, supplied in packed form. * * This is a recursive version of the algorithm. For a more detailed * description of the arguments of this function, see the reference im- * plementation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ void (*gpmvT)( const int, const int, const SCALAR, const TYPE *, const int, const TYPE *, const int, const SCALAR, TYPE *, const int ); void (*gpmvN)( const int, const int, const SCALAR, const TYPE *, const int, const TYPE *, const int, const SCALAR, TYPE *, const int ); #ifdef TREAL #define one ATL_rone TYPE beta0; #else const TYPE * beta0, one[2] = { ATL_rone, ATL_rzero }; #endif TYPE * A0, * x0, * y0; int j, jb, jbs, lda = LDA, m, mb, nb; /* .. * .. Executable Statements .. * */ ATL_GetPartSPMV( A, LDA, &mb, &nb ); beta0 = BETA; x0 = (TYPE *)(X); y0 = (TYPE *)(Y); gpmvT = Mjoin( PATL, gpmvLT_a1_x1_b1_y1 ); if( SCALAR_IS_ZERO( beta0 ) ) gpmvN = Mjoin( PATL, gpmvLN_a1_x1_b0_y1 ); else if( SCALAR_IS_ONE ( beta0 ) ) gpmvN = Mjoin( PATL, gpmvLN_a1_x1_b1_y1 ); else gpmvN = Mjoin( PATL, gpmvLN_a1_x1_bX_y1 ); for( j = 0; j < N; j += nb ) { jb = N - j; jb = Mmin( jb, nb ); Mjoin( PATL, refspmvL )( jb, one, A, lda, X, 1, beta0, Y, 1 ); if( ( m = N-j-jb ) != 0 ) { jbs = (jb SHIFT); A0 = (TYPE *)(A) + jbs; X += jbs; Y += jbs; gpmvT( jb, m, one, A0, lda, X, 1, one, y0, 1 ); gpmvN( m, jb, one, A0, lda, x0, 1, beta0, Y, 1 ); beta0 = one; gpmvN = Mjoin( PATL, gpmvLN_a1_x1_b1_y1 ); MLpnext( jb, A, lda ); x0 = (TYPE *)(X); y0 = (TYPE *)(Y); } } /* * End of Mjoin( PATL, spmvL ) */ }