void ATL_crefherkLC ( const int N, const int K, const float ALPHA, const float * A, const int LDA, const float BETA, float * C, const int LDC ) { /* * Purpose * ======= * * ATL_crefherkLC( ... ) * * <=> * * ATL_crefherk( AtlasLower, AtlasConjTrans, ... ) * * See ATL_crefherk for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register float t0_i, t0_r; int i, iali, ialj, icij, j, jai, jaj, jcj, l, lda2 = ( LDA << 1 ), ldc2 = ( LDC << 1 ); /* .. * .. Executable Statements .. * */ for( j = 0, jaj = 0, jcj = 0; j < N; j++, jaj += lda2, jcj += ldc2 ) { jai = j * lda2; icij = ( j << 1 ) + jcj; t0_r = ATL_sZERO; for( l = 0, iali = jai, ialj = jaj; l < K; l++, iali += 2, ialj += 2 ) { t0_r += A[iali] * A[ialj] + A[iali+1] * A[ialj+1]; } Mselscal( BETA, C[icij] ); Mset( C[icij] + ALPHA * t0_r, ATL_sZERO, C[icij], C[icij+1] ); icij += 2; jai += lda2; for( i = j+1; i < N; i++, jai += lda2, icij += 2 ) { Mset( ATL_sZERO, ATL_sZERO, t0_r, t0_i ); for( l = 0, iali = jai, ialj = jaj; l < K; l++, iali += 2, ialj += 2 ) { Mmla( A[iali], -A[iali+1], A[ialj], A[ialj+1], t0_r, t0_i ); } Mselscal( BETA, C[icij ] ); Mselscal( BETA, C[icij+1] ); Mset( C[icij] + ALPHA * t0_r, C[icij+1] + ALPHA * t0_i, C[icij], C[icij+1] ); } } /* * End of ATL_crefherkLC */ }
void ATL_crefherkUN ( const int N, const int K, const float ALPHA, const float * A, const int LDA, const float BETA, float * C, const int LDC ) { /* * Purpose * ======= * * ATL_crefherkUN( ... ) * * <=> * * ATL_crefherk( AtlasUpper, AtlasNoTrans, ... ) * * See ATL_crefherk for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register float t0_i, t0_r; int i, iail, iaj, iajl, icij, j, jal, jcj, l, lda2 = ( LDA << 1 ), ldc2 = ( LDC << 1 ); /* .. * .. Executable Statements .. * */ for( j = 0, iaj = 0, jcj = 0; j < N; j++, iaj += 2, jcj += ldc2 ) { Msvscal( (j << 1), BETA, C+jcj, 1 ); icij = ( j << 1 ) + jcj; Mselscal( BETA, C[icij] ); C[icij+1] = ATL_sZERO; for( l = 0, iajl = iaj, jal = 0; l < K; l++, iajl += lda2, jal += lda2 ) { Mset( ALPHA * A[iajl], -ALPHA * A[iajl+1], t0_r, t0_i ); for( i = 0, iail = jal, icij = jcj; i < j; i++, iail += 2, icij += 2 ) { Mmla( t0_r, t0_i, A[iail], A[iail+1], C[icij], C[icij+1] ); } Mset( C[icij] + t0_r * A[iail] - t0_i * A[iail+1], ATL_sZERO, C[icij], C[icij+1] ); } } /* * End of ATL_crefherkUN */ }
void ATL_srefsyr2kUT ( const int N, const int K, const float ALPHA, const float * A, const int LDA, const float * B, const int LDB, const float BETA, float * C, const int LDC ) { /* * Purpose * ======= * * ATL_srefsyr2kUT( ... ) * * <=> * * ATL_srefsyr2k( AtlasUpper, AtlasTrans, ... ) * * See ATL_srefsyr2k for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register float t0, t1; int i, iali, ialj, ibli, iblj, icij, j, jai, jaj, jbi, jbj, jcj, l; /* .. * .. Executable Statements .. * */ for( j = 0, jaj = 0, jbj = 0, jcj = 0; j < N; j++, jaj += LDA, jbj += LDB, jcj += LDC ) { for( i = 0, jai = 0, jbi = 0, icij = jcj; i <= j; i++, jai += LDA, jbi += LDB, icij += 1 ) { t0 = ATL_sZERO; t1 = ATL_sZERO; for( l = 0, iali = jai, ialj = jaj, ibli = jbi, iblj = jbj; l < K; l++, iali += 1, ialj += 1, ibli += 1, iblj += 1 ) { t0 += A[iali] * B[iblj]; t1 += B[ibli] * A[ialj]; } Mselscal( BETA, C[icij] ); C[icij] += ALPHA * t0 + ALPHA * t1; } } /* * End of ATL_srefsyr2kUT */ }
void ATL_srefsyrkLT ( const int N, const int K, const float ALPHA, const float * A, const int LDA, const float BETA, float * C, const int LDC ) { /* * Purpose * ======= * * ATL_srefsyrkLT( ... ) * * <=> * * ATL_srefsyrk( AtlasLower, AtlasTrans, ... ) * * See ATL_srefsyrk for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register float t0; int i, iali, ialj, icij, j, jai, jaj, jcj, l; /* .. * .. Executable Statements .. * */ for( j = 0, jaj = 0, jcj = 0; j < N; j++, jaj += LDA, jcj += LDC ) { for( i = j, jai = j*LDA, icij = j+jcj; i < N; i++, jai += LDA, icij += 1 ) { t0 = ATL_sZERO; for( l = 0, iali = jai, ialj = jaj; l < K; l++, iali += 1, ialj += 1 ) { t0 += A[iali] * A[ialj]; } Mselscal( BETA, C[icij] ); C[icij] += ALPHA * t0; } } /* * End of ATL_srefsyrkLT */ }
void ATL_srefgpmvLT ( const int M, const int N, const float ALPHA, const float * A, const int LDA, const float * X, const int INCX, const float BETA, float * Y, const int INCY ) { /* * Purpose * ======= * * ATL_srefgpmvLT( ... ) * * <=> * * ATL_srefgpmv( AtlasLower, AtlasTrans, ... ) * * See ATL_srefgpmv for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register float t0; int i, iaij, ix, j, jaj, jy, lda = LDA; /* .. * .. Executable Statements .. * */ for( j = 0, jaj = 0, jy = 0; j < M; j++, jy += INCY ) { t0 = ATL_sZERO; for( i = 0, iaij = jaj, ix = 0; i < N; i++, iaij += 1, ix += INCX ) { t0 += A[iaij] * X[ix]; } Mselscal( BETA, Y[jy] ); Y[jy] += ALPHA * t0; lda -= 1; jaj += lda; } /* * End of ATL_srefgpmvLT */ }
void ATL_srefgemmTN ( const int M, const int N, const int K, const float ALPHA, const float * A, const int LDA, const float * B, const int LDB, const float BETA, float * C, const int LDC ) { /* * .. Local Variables .. */ register float t0; int i, iai, iail, iblj, icij, j, jbj, jcj, l; /* .. * .. Executable Statements .. * */ for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) { for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA ) { t0 = ATL_sZERO; for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 ) { t0 += A[iail] * B[iblj]; } Mselscal( BETA, C[icij] ); C[icij] += ALPHA * t0; } } /* * End of ATL_srefgemmTN */ }
void ATL_crefher2kUN ( const int N, const int K, const float * ALPHA, const float * A, const int LDA, const float * B, const int LDB, const float BETA, float * C, const int LDC ) { /* * Purpose * ======= * * ATL_crefher2kUN( ... ) * * <=> * * ATL_crefher2k( AtlasUpper, AtlasNoTrans, ... ) * * See ATL_crefher2k for details. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ float t0_i, t0_r, t1_i, t1_r; int i, iail, iaj, iajl, ibil, ibj, ibjl, icij, j, jal, jbl, jcj, l, lda2 = ( LDA << 1 ), ldb2 = ( LDB << 1 ), ldc2 = ( LDC << 1 ); /* .. * .. Executable Statements .. * */ for( j = 0, iaj = 0, ibj = 0, jcj = 0; j < N; j++, iaj += 2, ibj += 2, jcj += ldc2 ) { Msvscal( (j << 1), BETA, C+jcj, 1 ); icij = ( j << 1 ) + jcj; Mselscal( BETA, C[icij] ); C[icij+1] = ATL_sZERO; for( l = 0, iajl = iaj, ibjl = ibj, jal = 0, jbl = 0; l < K; l++, iajl += lda2, ibjl += ldb2, jal += lda2, jbl += ldb2 ) { Mmul( ALPHA[0], ALPHA[1], B[ibjl], -B[ibjl+1], t0_r, t0_i ); Mmul( ALPHA[0], -ALPHA[1], A[iajl], -A[iajl+1], t1_r, t1_i ); for( i = 0, iail = jal, ibil = jbl, icij = jcj; i < j; i++, iail += 2, ibil += 2, icij += 2 ) { Mmla( A[iail], A[iail+1], t0_r, t0_i, C[icij], C[icij+1] ); Mmla( B[ibil], B[ibil+1], t1_r, t1_i, C[icij], C[icij+1] ); } Mset( C[icij] + A[iail] * t0_r - A[iail+1] * t0_i + B[ibil] * t1_r - B[ibil+1] * t1_i, ATL_sZERO, C[icij], C[icij+1] ); } } /* * End of ATL_crefher2kUN */ }