void ATL_crefgbmvT ( const int M, const int N, const int KL, const int KU, const float * ALPHA, const float * A, const int LDA, const float * X, const int INCX, const float * BETA, float * Y, const int INCY ) { /* * Purpose * ======= * * ATL_crefgbmvT( ... ) <=> ATL_crefgbmv( AtlasTrans, ... ) * * See ATL_crefgbmv for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register float t0_i, t0_r; int i, i0, i1, iaij, ix, j, jaj, jy, k, kx=0, ky=0; int incx2 = 2 * INCX, incy2 = 2 * INCY, lda2 = ( LDA << 1 ); /* .. * .. Executable Statements .. * */ for( j = 0, jaj = 0, jy = ky; j < M; j++, jaj += lda2, jy += incy2 ) { Mset( ATL_sZERO, ATL_sZERO, t0_r, t0_i ); k = KU - j; i0 = ( j - KU > 0 ? j - KU : 0 ); i1 = ( N - 1 > j + KL ? j + KL : N - 1 ); for( i = i0, iaij = ((k+i0) << 1)+jaj, ix = kx; i <= i1; i++, iaij += 2, ix += incx2 ) { Mmla( A[iaij], A[iaij+1], X[ix], X[ix+1], t0_r, t0_i ); } Mcelscal( BETA[0], BETA[1], Y[jy], Y[jy+1] ); Mmla( ALPHA[0], ALPHA[1], t0_r, t0_i, Y[jy], Y[jy+1] ); if( j >= KU ) kx += incx2; } /* * End of ATL_crefgbmvT */ }
void ATL_crefsyrkLT ( const int N, const int K, const float * ALPHA, const float * A, const int LDA, const float * BETA, float * C, const int LDC ) { /* * Purpose * ======= * * ATL_crefsyrkLT( ... ) * * <=> * * ATL_crefsyrk( AtlasLower, AtlasTrans, ... ) * * See ATL_crefsyrk for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register float t0_i, t0_r; int i, iali, ialj, icij, j, jai, jaj, jcj, l, lda2 = ( LDA << 1 ), ldc2 = ( LDC << 1 ); /* .. * .. Executable Statements .. * */ for( j = 0, jaj = 0, jcj = 0; j < N; j++, jaj += lda2, jcj += ldc2 ) { for( i = j, jai = j*lda2, icij = (j << 1)+jcj; i < N; i++, jai += lda2, icij += 2 ) { Mset( ATL_sZERO, ATL_sZERO, t0_r, t0_i ); for( l = 0, iali = jai, ialj = jaj; l < K; l++, iali += 2, ialj += 2 ) { Mmla( A[iali], A[iali+1], A[ialj], A[ialj+1], t0_r, t0_i ); } Mcelscal( BETA[0], BETA[1], C[icij], C[icij+1] ); Mmla( ALPHA[0], ALPHA[1], t0_r, t0_i, C[icij], C[icij+1] ); } } /* * End of ATL_crefsyrkLT */ }
void ATL_crefgpmvLH ( const int M, const int N, const float * ALPHA, const float * A, const int LDA, const float * X, const int INCX, const float * BETA, float * Y, const int INCY ) { /* * Purpose * ======= * * ATL_crefgpmvLH( ... ) * * <=> * * ATL_crefgpmv( AtlasLower, AtlasConjTrans, ... ) * * See ATL_crefgpmv for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register float t0_i, t0_r; int i, iaij, incx2 = 2 * INCX, incy2 = 2 * INCY, ix, j, jaj, jy, lda2 = ( LDA << 1 ); /* .. * .. Executable Statements .. * */ for( j = 0, jaj = 0, jy = 0; j < M; j++, jy += incy2 ) { Mset( ATL_sZERO, ATL_sZERO, t0_r, t0_i ); for( i = 0, iaij = jaj, ix = 0; i < N; i++, iaij += 2, ix += incx2 ) { Mmla( A[iaij], -A[iaij+1], X[ix], X[ix+1], t0_r, t0_i ); } Mcelscal( BETA[0], BETA[1], Y[jy], Y[jy+1] ); Mmla( ALPHA[0], ALPHA[1], t0_r, t0_i, Y[jy], Y[jy+1] ); lda2 -= 2; jaj += lda2; } /* * End of ATL_crefgpmvLH */ }
void ATL_crefgemmCT ( const int M, const int N, const int K, const float * ALPHA, const float * A, const int LDA, const float * B, const int LDB, const float * BETA, float * C, const int LDC ) { /* * .. Local Variables .. */ register float t0_i, t0_r; int i, iali, ibj, ibjl, icij, j, jai, jcj, l, lda2 = ( LDA << 1 ), ldb2 = ( LDB << 1 ), ldc2 = ( LDC << 1 ); /* .. * .. Executable Statements .. * */ for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 2, jcj += ldc2 ) { for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 2, jai += lda2 ) { Mset( ATL_sZERO, ATL_sZERO, t0_r, t0_i ); for( l = 0, iali = jai, ibjl = ibj; l < K; l++, iali += 2, ibjl += ldb2 ) { Mmla( A[iali], -A[iali+1], B[ibjl], B[ibjl+1], t0_r, t0_i ); } Mcelscal( BETA[0], BETA[1], C[icij], C[icij+1] ); Mmla( ALPHA[0], ALPHA[1], t0_r, t0_i, C[icij], C[icij+1] ); } } /* * End of ATL_crefgemmCT */ }
void ATL_crefhemmRL ( const int M, const int N, const float * ALPHA, const float * A, const int LDA, const float * B, const int LDB, const float * BETA, float * C, const int LDC ) { /* * Purpose * ======= * * ATL_crefhemmRL( ... ) * * <=> * * ATL_crefhemm( AtlasRight, AtlasLower, ... ) * * See ATL_crefhemm for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register float t0_i, t0_r; int i, iaj, iajk, iakj, ibij, ibik, icij, j, jaj, jbj, jbk, jcj, k, lda2 = ( LDA << 1 ), ldb2 = ( LDB << 1 ), ldc2 = ( LDC << 1 ); /* .. * .. Executable Statements .. * */ for( j = 0, iaj = 0, jaj = 0, jbj = 0, jcj = 0; j < N; j++, iaj += 2, jaj += lda2, jbj += ldb2, jcj += ldc2 ) { i = ( j << 1 ) + jaj; Mset( ALPHA[0] * A[i], ALPHA[1] * A[i], t0_r, t0_i ); for( i = 0, ibij = jbj, icij = jcj; i < M; i++, ibij += 2, icij += 2 ) { Mcelscal( BETA[0], BETA[1], C[icij], C[icij+1] ); Mmla( t0_r, t0_i, B[ibij], B[ibij+1], C[icij], C[icij+1] ); } for( k = 0, iajk = iaj, jbk = 0; k < j; k++, iajk += lda2, jbk += ldb2 ) { Mmul( ALPHA[0], ALPHA[1], A[iajk], -A[iajk+1], t0_r, t0_i ); for( i = 0, ibik = jbk, icij = jcj; i < M; i++, ibik += 2, icij += 2 ) { Mmla( t0_r, t0_i, B[ibik], B[ibik+1], C[icij], C[icij+1] ); } } for( k = j+1, iakj = ((j+1) << 1)+jaj, jbk = (j+1)*ldb2; k < N; k++, iakj += 2, jbk += ldb2 ) { Mmul( ALPHA[0], ALPHA[1], A[iakj], A[iakj+1], t0_r, t0_i ); for( i = 0, ibik = jbk, icij = jcj; i < M; i++, ibik += 2, icij += 2 ) { Mmla( t0_r, t0_i, B[ibik], B[ibik+1], C[icij], C[icij+1] ); } } } /* * End of ATL_crefhemmRL */ }
void ATL_crefsymmLL ( const int M, const int N, const float * ALPHA, const float * A, const int LDA, const float * B, const int LDB, const float * BETA, float * C, const int LDC ) { /* * Purpose * ======= * * ATL_crefsymmLL( ... ) * * <=> * * ATL_crefsymm( AtlasLeft, AtlasLower, ... ) * * See ATL_crefsymm for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register float t0_i, t0_r, t1_i, t1_r; int i, iaki, ibij, ibkj, icij, ickj, j, jai, jbj, jcj, k, lda2 = ( LDA << 1 ), ldb2 = ( LDB << 1 ), ldc2 = ( LDC << 1 ); /* .. * .. Executable Statements .. * */ for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += ldb2, jcj += ldc2 ) { for( i = M-1, jai = (M-1)*lda2, ibij = ( (M-1) << 1 )+jbj, icij = ( (M-1) << 1 )+jcj; i >= 0; i--, jai -= lda2, ibij -= 2, icij -= 2 ) { Mmul( ALPHA[0], ALPHA[1], B[ibij], B[ibij+1], t0_r, t0_i ); Mset( ATL_sZERO, ATL_sZERO, t1_r, t1_i ); for( k = i+1, iaki = ( (i+1) << 1 )+jai, ibkj = ( (i+1) << 1 )+jbj, ickj = ( (i+1) << 1 )+jcj; k < M; k++, iaki += 2, ibkj += 2, ickj += 2 ) { Mmla( A[iaki], A[iaki+1], t0_r, t0_i, C[ickj], C[ickj+1] ); Mmla( B[ibkj], B[ibkj+1], A[iaki], A[iaki+1], t1_r, t1_i ); } Mcelscal( BETA[0], BETA[1], C[icij], C[icij+1] ); k = ( i << 1 ) + jai; Mmla( t0_r, t0_i, A[k], A[k+1], C[icij], C[icij+1] ); Mmla( ALPHA[0], ALPHA[1], t1_r, t1_i, C[icij], C[icij+1] ); } } /* * End of ATL_crefsymmLL */ }