void libblis_test_herk_impl( mt_impl_t impl, obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c ) { switch ( impl ) { case BLIS_TEST_SEQ_FRONT_END: bli_herk( alpha, a, beta, c ); break; default: libblis_test_printf_error( "Invalid implementation type.\n" ); } }
void libblis_test_herk_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_herk( alpha, a, beta, c ); //bli_herk4m( alpha, a, beta, c ); //bli_herk3m( alpha, a, beta, c ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt; int r, n_repeats; uplo_t uploc; trans_t transa; f77_char f77_uploc; f77_char f77_transa; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 3; k_input = 1; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif uploc = BLIS_LOWER; //uploc = BLIS_UPPER; transa = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_uplo( uploc, &f77_uploc ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_does_trans( transa ) ) bli_obj_create( dt, k, m, 0, 0, &a ); else bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, m, m, 0, 0, &c ); bli_obj_create( dt, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, c ); bli_obj_set_uplo( uploc, c ); bli_obj_set_conjtrans( transa, a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_herk( &alpha, &a, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); ssyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); float* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); cherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_herk_blis" ); #else printf( "data_herk_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t x, y; obj_t alpha, beta; dim_t m; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int ii; #ifdef NBLIS bli_init(); #endif m = 4000; dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; { #ifdef NBLIS bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, 1, 0, 0, &x ); bli_obj_create( dt_a, m, 1, 0, 0, &y ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_b, m, m, 0, 0, &b ); bli_obj_create( dt_c, m, m, 0, 0, &c ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); #endif #ifdef NBLAS x.buffer = malloc( m * 1 * sizeof( double ) ); y.buffer = malloc( m * 1 * sizeof( double ) ); alpha.buffer = malloc( 1 * sizeof( double ) ); beta.buffer = malloc( 1 * sizeof( double ) ); a.buffer = malloc( m * m * sizeof( double ) ); a.m = m; a.n = m; a.cs = m; b.buffer = malloc( m * m * sizeof( double ) ); b.m = m; b.n = m; b.cs = m; c.buffer = malloc( m * m * sizeof( double ) ); c.m = m; c.n = m; c.cs = m; *((double*)alpha.buffer) = 2.0; *((double*)beta.buffer) = -1.0; #endif #ifdef NBLIS #if NBLIS >= 1 for ( ii = 0; ii < 2000000000; ++ii ) { bli_gemm( &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 2 { bli_hemm( BLIS_LEFT, &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 3 { bli_herk( &BLIS_ONE, &a, &BLIS_ONE, &c ); } #endif #if NBLIS >= 4 { bli_her2k( &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 5 { bli_trmm( BLIS_LEFT, &BLIS_ONE, &a, &c ); } #endif #if NBLIS >= 6 { bli_trsm( BLIS_LEFT, &BLIS_ONE, &a, &c ); } #endif #endif #ifdef NBLAS #if NBLAS >= 1 for ( ii = 0; ii < 2000000000; ++ii ) { f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 2 { f77_char side = 'L'; f77_char uplo = 'L'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsymm_( &side, &uplo, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 3 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyrk_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #if NBLAS >= 4 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyr2k_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 5 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* cp = bli_obj_buffer( c ); dtrmm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 6 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* cp = bli_obj_buffer( c ); dtrsm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 7 { f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 8 { f77_char side = 'L'; f77_char uplo = 'L'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zhemm_( &side, &uplo, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 9 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zherk_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #if NBLAS >= 10 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zher2k_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 11 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* cp = bli_obj_buffer( c ); ztrmm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 12 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* cp = bli_obj_buffer( c ); ztrsm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #endif #ifdef NBLIS bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); #endif #ifdef NBLAS free( x.buffer ); free( y.buffer ); free( alpha.buffer ); free( beta.buffer ); free( a.buffer ); free( b.buffer ); free( c.buffer ); #endif } #ifdef NBLIS bli_finalize(); #endif return 0; }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt_a, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); k_input = strtol( argv[3], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); dt_a = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; uplo = BLIS_LOWER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, k, 0, 0, &a ); bli_obj_create( dt_c, m, m, 0, 0, &c ); bli_obj_create( dt_c, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &c ); bli_obj_set_uplo( uplo, &c ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_herk( &alpha, &a, &beta, &c ); #else f77_char uploa = 'L'; f77_char transa = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsyrk_( &uploa, &transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_herk_blis" ); #else printf( "data_herk_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }