int main(int nargs, char **args) { int m, n, lda=4; TYPE a[4]; printf("\nGEMM: NB=%d, lat=%d, mu=%d, nu=%d, ku=%d\n\n", NB, ATL_mmLAT, ATL_mmMU, ATL_mmNU, ATL_mmKU); ATL_GetPartMVN(a, lda, &m, &n); printf("mvN block = %d x %d; mu=%d, nu=%d\n", m, n, ATL_mvNMU, ATL_mvNNU); ATL_GetPartMVT(a, lda, &m, &n); printf("mvT block = %d x %d; mu=%d, nu=%d\n\n", m, n, ATL_mvTMU, ATL_mvTNU); ATL_GetPartSYMV(a, lda, &m, &n); printf("symv block = %d x %d\n\n", m, n); ATL_GetPartR1(a, lda, m, n); printf("ger block = %d x %d; mu=%d, nu=%d\n\n", m, n, ATL_r1MU, ATL_r1NU); return(0); }
void Mjoin( PATL, tbsvUC ) ( const enum ATLAS_DIAG DIAG, const int N, /* N > 0 assumed */ const int K, const TYPE * A, const int LDA, TYPE * X ) { /* * Purpose * ======= * * Mjoin( PATL, tbsvUC ) solves the following triangular system of equations * * conjg( A ) * x = b, * * where b and x are n-element vectors and A is an n by n unit or non- * unit, upper triangular band matrix. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ void (*tbsv0)( const int, const int, const TYPE *, const int, TYPE * ); #ifdef TREAL #define lda2 LDA #define one ATL_rone #define none ATL_rnone #else TYPE none[2] = { ATL_rnone, ATL_rzero }, one [2] = { ATL_rone, ATL_rzero }; const int lda2 = ( LDA SHIFT ); #endif #ifdef ATL_AXPYMV int ia, ian, j, kl, ku, m1, ma, mb, mb1, n, na, nb; #else int ia, ian, kl, ku, ma, mb, na, nb, nb1; #endif /* .. * .. Executable Statements .. * */ ATL_GetPartMVN( A, LDA, &mb, &nb ); if( DIAG == AtlasNonUnit ) tbsv0 = Mjoin( PATL, tbsvUCN ); else tbsv0 = Mjoin( PATL, tbsvUCU ); #ifdef ATL_AXPYMV mb1 = N - ( m1 = ( ( N - 1 ) / mb ) * mb ); tbsv0( mb1, K, A+m1*lda2, LDA, X+(m1 SHIFT) ); for( n = mb1, j = m1 - mb; n < N; n += mb, j -= mb ) { ian = j + mb; ia = mb - K; ia = j + Mmax( ia, 0 ); ma = ian - ia; na = N - ian; na = Mmin( na, K ); kl = ma - 1; kl = Mmax( kl, 0 ); ku = K - 1 - kl; ku = Mmax( ku, 0 ); Mjoin( PATL, gbmv )( AtlasConj, ma, na, kl, ku, none, A+ian*lda2, LDA, X+(ian SHIFT), 1, one, X+(ia SHIFT), 1 ); tbsv0( mb, K, A+j*lda2, LDA, X+(j SHIFT) ); } #else nb1 = N - ( ( N - 1 ) / nb ) * nb; for( ian = N - nb; ian > 0; ian -= nb ) { ia = ian - K; ia = Mmax( ia, 0 ); ma = ian - ia; na = Mmin( nb, K ); kl = ma - 1; kl = Mmax( kl, 0 ); ku = K - 1 - kl; ku = Mmax( ku, 0 ); tbsv0( nb, K, A+ian*lda2, LDA, X+(ian SHIFT) ); Mjoin( PATL, gbmv )( AtlasConj, ma, na, kl, ku, none, A+ian*lda2, LDA, X+(ian SHIFT), 1, one, X+(ia SHIFT), 1 ); } tbsv0( nb1, K, A, LDA, X ); #endif /* * End of Mjoin( PATL, tbsvUC ) */ }
void Mjoin( PATL, tpsvUC ) ( const enum ATLAS_DIAG DIAG, const int N, /* N > 0 assumed */ const TYPE * A, const int LDA, TYPE * X ) { /* * Purpose * ======= * * Mjoin( PATL, tpsvUC ) solves the following triangular system of equations * * conjg( A ) * x = b, * * where b and x are n-element vectors and A is an n by n unit or non- * unit, upper triangular matrix supplied in packed form. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ void (*tpsv0)( const int, const TYPE *, const int, TYPE * ); #ifdef TREAL #define none ATL_rnone #define one ATL_rone #else TYPE none[2] = { ATL_rnone, ATL_rzero }, one [2] = { ATL_rone, ATL_rzero }; #endif #ifdef ATL_AXPYMV TYPE * x0; int incX, lda = LDA, m1, mb, mb1, n, nb; #else TYPE * A0, * x0; int incX, lda = LDA, m, mb, nb, nb1; #endif /* .. * .. Executable Statements .. * */ ATL_GetPartMVN( A, N, &mb, &nb ); if( DIAG == AtlasNonUnit ) tpsv0 = Mjoin( PATL, tpsvUCN ); else tpsv0 = Mjoin( PATL, tpsvUCU ); #ifdef ATL_AXPYMV mb1 = N - ( m1 = ( ( N - 1 ) / mb ) * mb ); incX = (mb SHIFT); X += (m1 SHIFT); x0 = X; MUpnext( m1, A, lda ); tpsv0( mb1, A, lda, X ); X -= incX; for( n = mb1; n < N; n += mb, X -= incX, x0 -= incX ) { Mjoin( PATL, gpmv )( AtlasUpper, AtlasConj, mb, n, none, A - incX, lda, x0, 1, one, X, 1 ); MUpprev( mb, A, lda ); tpsv0( mb, A, lda, X ); } #else nb1 = N - ( ( N - 1 ) / nb ) * nb; incX = (nb SHIFT); x0 = X; A0 = (TYPE *)(A); MUpnext( N-nb, A, lda ); for( m = N - nb, X += ((N-nb) SHIFT); m > 0; m -= nb, X -= incX ) { tpsv0( nb, A, lda, X ); Mjoin( PATL, gpmv )( AtlasUpper, AtlasConj, m, nb, none, A - (m SHIFT), lda, X, 1, one, x0, 1 ); MUpprev( nb, A, lda ); } tpsv0( nb1, A0, LDA, x0 ); #endif /* * End of Mjoin( PATL, tpsvUC ) */ }
void Mjoin( PATL, trsvUN ) ( const enum ATLAS_DIAG DIAG, const int N, /* N > 0 assumed */ const TYPE * A, const int LDA, TYPE * X ) { /* * Purpose * ======= * * Mjoin( PATL, trsvUN ) solves the following triangular system of equations * * A * x = b, * * where b and x are n-element vectors and A is an n by n unit or non- * unit, upper triangular matrix. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ void (*trsv0)( const int, const TYPE *, const int, TYPE * ); #ifdef TREAL #define none ATL_rnone #define one ATL_rone #define lda2 LDA TYPE * A0, * x0; #else TYPE none[2] = { ATL_rnone, ATL_rzero }, one [2] = { ATL_rone, ATL_rzero }; TYPE * A0, * x0; const int lda2 = ( LDA SHIFT ); #endif #ifdef ATL_AXPYMV int incA, incX, m1, mb, mb1, n, nb; #else int incA, incX, m, mb, nb, nb1; #endif /* .. * .. Executable Statements .. * */ ATL_GetPartMVN( A, LDA, &mb, &nb ); if( DIAG == AtlasNonUnit ) trsv0 = Mjoin( PATL, trsvUNN ); else trsv0 = Mjoin( PATL, trsvUNU ); #ifdef ATL_AXPYMV mb1 = N - ( m1 = ( ( N - 1 ) / mb ) * mb ); incA = ( incX = (mb SHIFT) ) + mb * lda2; A += (m1 SHIFT) + m1 * lda2; X += (m1 SHIFT); A0 = (TYPE *)(A) - incX; x0 = X; trsv0( mb1, A, LDA, X ); X -= incX; A -= incA; for( n = mb1; n < N; n += mb, A0 -= incA, A -= incA, X -= incX, x0 -= incX ) { Mjoin( PATL, gemv )( AtlasNoTrans, mb, n, none, A0, LDA, x0, 1, one, X, 1 ); trsv0( mb, A, LDA, X ); } #else nb1 = N - ( ( N - 1 ) / nb ) * nb; incA = nb * lda2; incX = (nb SHIFT); A0 = (TYPE *)(A); x0 = X; for( m = N - nb, A += (N-nb) * lda2, X += ((N-nb) SHIFT); m > 0; m -= nb, A -= incA, X -= incX ) { trsv0( nb, A+(m SHIFT), LDA, X ); Mjoin( PATL, gemv )( AtlasNoTrans, m, nb, none, A, LDA, X, 1, one, x0, 1 ); } trsv0( nb1, A0, LDA, x0 ); #endif /* * End of Mjoin( PATL, trsvUN ) */ }
void Mjoin( PATL, tbmvUC ) ( const enum ATLAS_DIAG DIAG, const int N, /* N > 0 assumed */ const int K, const TYPE * A, const int LDA, TYPE * X ) { /* * Purpose * ======= * * Mjoin( PATL, tbmvUC ) performs the following matrix-vector operation * * x := conjg( A ) * x, * * where x is an n-element vector and A is an n by n unit or non-unit, * upper triangular band matrix. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ void (*tbmv0)( const int, const int, const TYPE *, const int, TYPE * ); #ifdef TREAL #define one ATL_rone #define lda2 LDA #else const int lda2 = ( LDA SHIFT ); TYPE one[2] = { ATL_rone, ATL_rzero }; #endif #ifdef ATL_AXPYMV int ia, ian, j, kl, ku, ma, mb, mb1, n, na, nb; #else int ia, ian, kl, ku, ma, mb, na, nb, nb1; #endif #define gbmv0 Mjoin( PATL, gbmvNc_a1_x1_b1_y1 ) /* .. * .. Executable Statements .. * */ ATL_GetPartMVN( A, LDA, &mb, &nb ); if( DIAG == AtlasNonUnit ) tbmv0 = Mjoin( PATL, tbmvUCN ); else tbmv0 = Mjoin( PATL, tbmvUCU ); #ifdef ATL_AXPYMV mb1 = N - ( ( N - 1 ) / mb ) * mb; for( n = N - mb, j = 0; n > 0; n -= mb, j += mb ) { ian = j + mb; ia = mb - K; ia = j + Mmax( ia, 0 ); ma = ian - ia; na = N - ian; na = Mmin( na, K ); kl = ma - 1; kl = Mmax( kl, 0 ); ku = K - 1 - kl; ku = Mmax( ku, 0 ); tbmv0( mb, K, A+j*lda2, LDA, X+(j SHIFT) ); gbmv0( ma, na, kl, ku, one, A+ian*lda2, LDA, X+(ian SHIFT), 1, one, X+(ia SHIFT), 1 ); } tbmv0( mb1, K, A+j*lda2, LDA, X+(j SHIFT) ); #else nb1 = N - ( ( N - 1 ) / nb ) * nb; tbmv0( nb1, K, A, LDA, X ); for( ian = nb1; ian < N; ian += nb ) { ia = ian - K; ia = Mmax( ia, 0 ); ma = ian - ia; na = Mmin( nb, K ); kl = ma - 1; kl = Mmax( kl, 0 ); ku = K - 1 - kl; ku = Mmax( ku, 0 ); gbmv0( ma, na, kl, ku, one, A+ian*lda2, LDA, X+(ian SHIFT), 1, one, X+(ia SHIFT), 1 ); tbmv0( nb, K, A+ian*lda2, LDA, X+(ian SHIFT) ); } #endif /* * End of Mjoin( PATL, tbmvUC ) */ }
void Mjoin( PATL, tpmvLC ) ( const enum ATLAS_DIAG DIAG, const int N, /* N > 0 assumed */ const TYPE * A, const int LDA, TYPE * X ) { /* * Purpose * ======= * * Mjoin( PATL, tpmvLC ) performs the following matrix-vector operation * * x := conjg( A ) * x, * * where x is an n-element vector and A is an n by n unit or non-unit, * lower triangular matrix supplied in packed form. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #ifdef TREAL #define one ATL_rone #else TYPE one[2] = { ATL_rone, ATL_rzero }; #endif #ifdef ATL_AXPYMV TYPE * A0, * x0; int incX, lda, lda0, mb, mb1, n, nb; #else TYPE * x0; int incX, lda, m, mb, n1, nb, nb1; #endif void (*tpmv0)( const int, const TYPE *, const int, TYPE * ); #define gpmv0 Mjoin( PATL, gpmvLNc_a1_x1_b1_y1 ) /* .. * .. Executable Statements .. * */ ATL_GetPartMVN( A, LDA, &mb, &nb ); if( DIAG == AtlasNonUnit ) tpmv0 = Mjoin( PATL, tpmvLCN ); else tpmv0 = Mjoin( PATL, tpmvLCU ); #ifdef ATL_AXPYMV mb1 = N - ( ( N - 1 ) / mb ) * mb; incX = (mb SHIFT); x0 = X; lda = lda0 = LDA; A0 = (TYPE *)(A); MLpnext( N, A, lda ); for( n = N - mb, X += ((N-mb) SHIFT); n > 0; n -= mb, X -= incX ) { MLpprev( mb, A, lda ); tpmv0( mb, A, lda, X ); gpmv0( mb, n, one, A0 + (n SHIFT), lda0, x0, 1, one, X, 1 ); } tpmv0( mb1, A0, lda0, x0 ); #else nb1 = N - ( n1 = ( ( N - 1 ) / nb ) * nb ); incX = (nb SHIFT); x0 = ( X += (n1 SHIFT) ); lda = LDA; MLpnext( n1, A, lda ); tpmv0( nb1, A, lda, X ); x0 = X; X -= incX; for( m = nb1; m < N; m += nb, X -= incX, x0 -= incX ) { MLpprev( nb, A, lda ); gpmv0( m, nb, one, A + (nb SHIFT), lda, X, 1, one, x0, 1 ); tpmv0( nb, A, lda, X ); } #endif /* * End of Mjoin( PATL, tpmvLC ) */ }
void Mjoin( PATL, tbmvLN ) ( const enum ATLAS_DIAG DIAG, const int N, /* N > 0 assumed */ const int K, const TYPE * A, const int LDA, TYPE * X ) { /* * Purpose * ======= * * Mjoin( PATL, tbmvLN ) performs the following matrix-vector operation * * x := A * x, * * where x is an n-element vector and A is an n by n unit or non-unit, * lower triangular band matrix. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ void (*tbmv0)( const int, const int, const TYPE *, const int, TYPE * ); #ifdef TREAL #define lda2 LDA #define one ATL_rone #else TYPE one[2] = { ATL_rone, ATL_rzero }; const int lda2 = ( LDA SHIFT ); #endif #ifdef ATL_AXPYMV int ja, jan, kl, ku, ma, mb, mb1, na, nb; #else int ja, jan, kl, ku, m, ma, mb, n1, na, nb, nb1; #endif #define gbmv0 Mjoin( PATL, gbmvN_a1_x1_b1_y1 ) /* .. * .. Executable Statements .. * */ ATL_GetPartMVN( A, LDA, &mb, &nb ); if( DIAG == AtlasNonUnit ) tbmv0 = Mjoin( PATL, tbmvLNN ); else tbmv0 = Mjoin( PATL, tbmvLNU ); #ifdef ATL_AXPYMV mb1 = N - ( ( N - 1 ) / mb ) * mb; for( jan = N - mb; jan > 0; jan -= mb ) { ja = jan - K; ja = Mmax( ja, 0 ); ma = Mmin( mb, K ); ku = na = jan - ja; kl = K - ku; kl = Mmax( kl, 0 ); tbmv0( mb, K, A+jan*lda2, LDA, X+(jan SHIFT) ); gbmv0( ma, na, kl, ku, one, A+ja*lda2, LDA, X+(ja SHIFT), 1, one, X+(jan SHIFT), 1 ); } tbmv0( mb1, K, A, LDA, X ); #else nb1 = N - ( n1 = ( ( N - 1 ) / nb ) * nb ); tbmv0( nb1, K, A + n1*lda2, LDA, X+(n1 SHIFT) ); for( m = nb1, jan = n1; m < N; m += nb ) { ja = nb - K; ja = jan - nb + Mmax( ja, 0 ); ma = Mmin( m, K ); ku = na = jan - ja; kl = K - ku; kl = Mmax( kl, 0 ); gbmv0( ma, na, kl, ku, one, A+ja*lda2, LDA, X+(ja SHIFT), 1, one, X+(jan SHIFT), 1 ); jan -= nb; tbmv0( nb, K, A+jan*lda2, LDA, X+(jan SHIFT) ); } #endif /* * End of Mjoin( PATL, tbmvLN ) */ }
void Mjoin( PATL, trmvLC ) ( const enum ATLAS_DIAG DIAG, const int N, /* N > 0 assumed */ const TYPE * A, const int LDA, TYPE * X ) { /* * Purpose * ======= * * Mjoin( PATL, trmvLC ) performs the following matrix-vector operation * * x := conjg( A ) * x, * * where x is an n-element vector and A is an n by n unit or non-unit, * lower triangular matrix. * * This is a blocked version of the algorithm. For a more detailed des- * cription of the arguments of this function, see the reference imple- * mentation in the ATLAS/src/blas/reference directory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #ifdef TREAL #define one ATL_rone #define lda2 LDA #else TYPE one[2] = { ATL_rone, ATL_rzero }; const int lda2 = ( LDA SHIFT ); #endif TYPE * A0, * x0; #ifdef ATL_AXPYMV int incA, incX, mb, mb1, n, nb; #else int incA, incX, m, mb, n1, nb, nb1; #endif void (*trmv0)( const int, const TYPE *, const int, TYPE * ); #define gemv0 Mjoin( PATL, gemvNc_a1_x1_b1_y1 ) /* .. * .. Executable Statements .. * */ ATL_GetPartMVN( A, LDA, &mb, &nb ); if( DIAG == AtlasNonUnit ) trmv0 = Mjoin( PATL, trmvLCN ); else trmv0 = Mjoin( PATL, trmvLCU ); #ifdef ATL_AXPYMV mb1 = N - ( ( N - 1 ) / mb ) * mb; incA = incX = (mb SHIFT); A0 = (TYPE *)(A); x0 = X; for( n = N - mb, A += ((N-mb) SHIFT), X += ((N-mb) SHIFT); n > 0; n -= mb, A -= incA, X -= incX ) { trmv0( mb, A+n*lda2, LDA, X ); gemv0( mb, n, one, A, LDA, x0, 1, one, X, 1 ); } trmv0( mb1, A0, LDA, x0 ); #else nb1 = N - ( n1 = ( ( N - 1 ) / nb ) * nb ); incA = nb * lda2 + ( incX = (nb SHIFT) ); x0 = ( X += (n1 SHIFT) ); A += (n1 SHIFT) + n1 * lda2; trmv0( nb1, A, LDA, X ); x0 = X; X -= incX; A -= incA; A0 = (TYPE *)(A) + incX; for( m = nb1; m < N; m += nb, A0 -= incA, A -= incA, X -= incX, x0 -= incX ) { gemv0( m, nb, one, A0, LDA, X, 1, one, x0, 1 ); trmv0( nb, A, LDA, X ); } #endif /* * End of Mjoin( PATL, trmvLC ) */ }