void libblis_test_normfm_check ( test_params_t* params, obj_t* beta, obj_t* x, obj_t* norm, double* resid ) { num_t dt_real = bli_obj_datatype_proj_to_real( *x ); dim_t m = bli_obj_length( *x ); dim_t n = bli_obj_width( *x ); obj_t m_r, n_r, temp_r; double junk; // // Pre-conditions: // - x is set to beta. // Note: // - beta should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // norm := normf( x ) // // is functioning correctly if // // norm = sqrt( absqsc( beta ) * m * n ) // // where m and n are the dimensions of x. // bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_obj_scalar_init_detached( dt_real, &m_r ); bli_obj_scalar_init_detached( dt_real, &n_r ); bli_setsc( ( double )m, 0.0, &m_r ); bli_setsc( ( double )n, 0.0, &n_r ); bli_absqsc( beta, &temp_r ); bli_mulsc( &m_r, &temp_r ); bli_mulsc( &n_r, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_subsc( &temp_r, norm ); bli_getsc( norm, resid, &junk ); }
void libblis_test_addv_check( obj_t* alpha, obj_t* beta, obj_t* x, obj_t* y, double* resid ) { num_t dt = bli_obj_datatype( *x ); num_t dt_real = bli_obj_datatype_proj_to_real( *x ); dim_t m = bli_obj_vector_dim( *x ); conj_t conjx = bli_obj_conj_status( *x ); obj_t aplusb; obj_t alpha_conj; obj_t norm_r, m_r, temp_r; double junk; // // Pre-conditions: // - x is set to alpha. // - y_orig is set to beta. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig + conjx(x) // // is functioning correctly if // // fnormv(y) - sqrt( absqsc( beta + conjx(alpha) ) * m ) // // is negligible. // bli_obj_scalar_init_detached( dt, &aplusb ); bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_obj_scalar_init_detached( dt_real, &norm_r ); bli_obj_scalar_init_detached( dt_real, &m_r ); bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_fnormv( y, &norm_r ); bli_copysc( beta, &aplusb ); bli_addsc( &alpha_conj, &aplusb ); bli_setsc( ( double )m, 0.0, &m_r ); bli_absqsc( &aplusb, &temp_r ); bli_mulsc( &m_r, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_subsc( &temp_r, &norm_r ); bli_getsc( &norm_r, resid, &junk ); }
void libblis_test_fnormv_check( obj_t* beta, obj_t* x, obj_t* norm, double* resid ) { num_t dt_real = bli_obj_datatype_proj_to_real( *x ); dim_t m = bli_obj_vector_dim( *x ); obj_t m_r, temp_r; double junk; // // Pre-conditions: // - x is set to beta. // Note: // - beta should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // norm := fnorm( x ) // // is functioning correctly if // // norm = sqrt( absqsc( beta ) * m ) // // where m is the length of x. // bli_obj_init_scalar( dt_real, &temp_r ); bli_obj_init_scalar( dt_real, &m_r ); bli_setsc( ( double )m, 0.0, &m_r ); bli_absqsc( beta, &temp_r ); bli_mulsc( &m_r, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_subsc( &temp_r, norm ); bli_getsc( norm, resid, &junk ); }
void libblis_test_trmm3_check( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_datatype( *c ); num_t dt_real = bli_obj_datatype_proj_to_real( *c ); dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); obj_t kappa, norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized and triangular. // - b is randomized. // - c_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transb(B) (side = left) // C := beta * C_orig + alpha * transb(B) * transa(A) (side = right) // // is functioning correctly if // // fnorm( v - z ) // // is negligible, where // // v = C * t // // z = ( beta * C_orig + alpha * transa(A) * transb(B) ) * t (side = left) // = beta * C_orig * t + alpha * transa(A) * transb(B) * t // = beta * C_orig * t + alpha * transa(A) * w // = beta * C_orig * t + z // // z = ( beta * C_orig + alpha * transb(B) * transa(A) ) * t (side = right) // = beta * C_orig * t + alpha * transb(B) * transa(A) * t // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z bli_obj_scalar_init_detached( dt, &kappa ); bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, n, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } bli_randv( &t ); bli_setsc( 1.0/( double )n, 0.0, &kappa ); bli_scalv( &kappa, &t ); bli_gemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); if ( bli_is_left( side ) ) { bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &w ); bli_trmv( alpha, a, &w ); bli_copyv( &w, &z ); } else { bli_copyv( &t, &w ); bli_trmv( &BLIS_ONE, a, &w ); bli_gemv( alpha, b, &w, &BLIS_ZERO, &z ); } bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_fnormv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 8; n_input = 4; #endif dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_obj_set_struc( BLIS_TRIANGULAR, a ); bli_obj_set_uplo( uplo, a ); bli_randm( &a ); bli_randm( &c ); bli_randm( &b ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%11.8f", "" ); bli_printm( "c", &c, "%14.11f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_trmm( side, &alpha, &a, &c ); #else f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* cp = bli_obj_buffer( c ); dtrmm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%14.11f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_trmm_blis" ); #else printf( "data_trmm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
void libblis_test_symv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; dim_t m; uplo_t uploa; conj_t conja; conj_t conjx; obj_t alpha, a, x, beta, y; obj_t y_save; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_conj( pc_str[1], &conja ); bli_param_map_char_to_blis_conj( pc_str[2], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save ); // Set alpha and beta. if ( bli_obj_is_real( &y ) ) { bli_setsc( 1.0, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 0.5, 0.5, &alpha ); bli_setsc( -0.5, 0.5, &beta ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely symmetric, and zero the unstored triangle // to ensure the implementation reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mksymm( &a ); bli_mktrim( &a ); // Randomize x and y, and save y. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &y ); bli_copyv( &y, &y_save ); // Apply the remaining parameters. bli_obj_set_conj( conja, &a ); bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_symv_impl( iface, &alpha, &a, &x, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_symv_check( params, &alpha, &a, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); }
int main( int argc, char** argv ) { obj_t alpha, beta, gamma; obj_t x, y, z, w, a; num_t dt; dim_t m, n; inc_t rs, cs; // // This file demonstrates working with vector objects and the level-1v // operations. // // // Example 1: Create vector objects and then broadcast (copy) scalar // values to all elements. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create a few vectors to work with. We make them all of the same length // so that we can perform operations between them. // NOTE: We've chosen to use row vectors here (1x4) instead of column // vectors (4x1) to allow for easier reading of standard output (less // scrolling). dt = BLIS_DOUBLE; m = 1; n = 4; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &x ); bli_obj_create( dt, m, n, rs, cs, &y ); bli_obj_create( dt, m, n, rs, cs, &z ); bli_obj_create( dt, m, n, rs, cs, &w ); bli_obj_create( dt, m, n, rs, cs, &a ); // Let's also create and initialize some scalar objects. bli_obj_create_1x1( dt, &alpha ); bli_obj_create_1x1( dt, &beta ); bli_obj_create_1x1( dt, &gamma ); bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( 0.2, 0.0, &beta ); bli_setsc( 3.0, 0.0, &gamma ); bli_printm( "alpha:", &alpha, "%4.1f", "" ); bli_printm( "beta:", &beta, "%4.1f", "" ); bli_printm( "gamma:", &gamma, "%4.1f", "" ); // Vectors can set by "broadcasting" a constant to every element. bli_setv( &BLIS_ONE, &x ); bli_setv( &alpha, &y ); bli_setv( &BLIS_ZERO, &z ); // Note that we can use printv or printm to print vectors since vectors // are also matrices. We choose to use printm because it honors the // orientation of the vector (row or column) when printing, whereas // printv always prints vectors as column vectors regardless of their // they are 1 x n or n x 1. bli_printm( "x := 1.0", &x, "%4.1f", "" ); bli_printm( "y := alpha", &y, "%4.1f", "" ); bli_printm( "z := 0.0", &z, "%4.1f", "" ); // // Example 2: Randomize a vector object. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Set a vector to random values. bli_randv( &w ); bli_printm( "w := randv()", &w, "%4.1f", "" ); // // Example 3: Perform various element-wise operations on vector objects. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Copy a vector. bli_copyv( &w, &a ); bli_printm( "a := w", &a, "%4.1f", "" ); // Add and subtract vectors. bli_addv( &y, &a ); bli_printm( "a := a + y", &a, "%4.1f", "" ); bli_subv( &w, &a ); bli_printm( "a := a - w", &a, "%4.1f", "" ); // Scale a vector (destructive). bli_scalv( &beta, &a ); bli_printm( "a := beta * a", &a, "%4.1f", "" ); // Scale a vector (non-destructive). bli_scal2v( &gamma, &a, &z ); bli_printm( "z := gamma * a", &z, "%4.1f", "" ); // Scale and accumulate between vectors. bli_axpyv( &alpha, &w, &x ); bli_printm( "x := x + alpha * w", &x, "%4.1f", "" ); bli_xpbyv( &w, &BLIS_MINUS_ONE, &x ); bli_printm( "x := -1.0 * x + w", &x, "%4.1f", "" ); // Invert a vector element-wise. bli_invertv( &y ); bli_printm( "y := 1 / y", &y, "%4.1f", "" ); // Swap two vectors. bli_swapv( &x, &y ); bli_printm( "x (after swapping with y)", &x, "%4.1f", "" ); bli_printm( "y (after swapping with x)", &y, "%4.1f", "" ); // // Example 4: Perform contraction-like operations on vector objects. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Perform a dot product. bli_dotv( &a, &z, &gamma ); bli_printm( "gamma := a * z (dot product)", &gamma, "%5.2f", "" ); // Perform an extended dot product. bli_dotxv( &alpha, &a, &z, &BLIS_ONE, &gamma ); bli_printm( "gamma := 1.0 * gamma + alpha * a * z (accumulate scaled dot product)", &gamma, "%5.2f", "" ); // Free the objects. bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &gamma ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &z ); bli_obj_free( &w ); bli_obj_free( &a ); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); n_input = strtol( argv[2], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); #if 1 dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; #else dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT; //dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; //uplo = BLIS_UPPER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); //bli_obj_set_diag( BLIS_UNIT_DIAG, &a ); bli_randm( &a ); bli_randm( &c ); bli_randm( &b ); /* { obj_t a2; bli_obj_alias_to( &a, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_inc_diag_offset( 1, &a2 ); bli_setm( &BLIS_ZERO, &a2 ); bli_obj_inc_diag_offset( -2, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a2 ); bli_scalm( &BLIS_TWO, &a2 ); //bli_scalm( &BLIS_TWO, &a ); } */ bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT /* obj_t ar, ai; bli_obj_alias_to( &a, &ar ); bli_obj_alias_to( &a, &ai ); bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2; bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1; bli_printm( "ar", &ar, "%4.1f", "" ); bli_printm( "ai", &ai, "%4.1f", "" ); */ bli_invertd( &a ); bli_printm( "a", &a, "%4.1f", "" ); bli_invertd( &a ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_trsm( side, //bli_trsm4m( side, //bli_trsm3m( side, &alpha, &a, &c ); #else if ( bli_is_real( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float * alphap = bli_obj_buffer( &alpha ); float * ap = bli_obj_buffer( &a ); float * cp = bli_obj_buffer( &c ); strsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else // if ( bli_is_complex( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* cp = bli_obj_buffer( &c ); ctrsm_( &side, //ztrsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt_a ) ) gflops *= 4.0; #ifdef BLIS printf( "data_trsm_blis" ); #else printf( "data_trsm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
void libblis_test_dotxaxpyf_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, b_n; conj_t conjat, conja, conjw, conjx; obj_t alpha, at, a, w, x, beta, y, z; obj_t y_save, z_save; cntx_t cntx; // Initialize a context. bli_dotxaxpyf_cntx_init( &cntx ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Query the operation's fusing factor for the current datatype. b_n = bli_cntx_get_blksz_def_dt( datatype, BLIS_XF, &cntx ); // Store the fusing factor so that the driver can retrieve the value // later when printing results. op->dim_aux[0] = b_n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjat ); bli_param_map_char_to_blis_conj( pc_str[1], &conja ); bli_param_map_char_to_blis_conj( pc_str[2], &conjw ); bli_param_map_char_to_blis_conj( pc_str[3], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, b_n, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &w ); libblis_test_vobj_create( params, datatype, sc_str[2], b_n, &x ); libblis_test_vobj_create( params, datatype, sc_str[3], b_n, &y ); libblis_test_vobj_create( params, datatype, sc_str[3], b_n, &y_save ); libblis_test_vobj_create( params, datatype, sc_str[4], m, &z ); libblis_test_vobj_create( params, datatype, sc_str[4], m, &z_save ); // Set alpha. if ( bli_obj_is_real( y ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 1.2, 0.1, &alpha ); bli_setsc( -1.0, -0.1, &beta ); } // Randomize A, w, x, y, and z, and save y and z. libblis_test_mobj_randomize( params, FALSE, &a ); libblis_test_vobj_randomize( params, FALSE, &w ); libblis_test_vobj_randomize( params, FALSE, &x ); libblis_test_vobj_randomize( params, FALSE, &y ); libblis_test_vobj_randomize( params, FALSE, &z ); bli_copyv( &y, &y_save ); bli_copyv( &z, &z_save ); // Create an alias to a for at. (Note that it should NOT actually be // marked for transposition since the transposition is part of the dotxf // subproblem.) bli_obj_alias_to( a, at ); // Apply the parameters. bli_obj_set_conj( conjat, at ); bli_obj_set_conj( conja, a ); bli_obj_set_conj( conjw, w ); bli_obj_set_conj( conjx, x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); bli_copyv( &z_save, &z ); time = bli_clock(); libblis_test_dotxaxpyf_impl( iface, &alpha, &at, &a, &w, &x, &beta, &y, &z, &cntx ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * b_n + 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( y ) ) *perf *= 4.0; // Perform checks. libblis_test_dotxaxpyf_check( params, &alpha, &at, &a, &w, &x, &beta, &y, &z, &y_save, &z_save, resid ); // Zero out performance and residual if either output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); libblis_test_check_empty_problem( &z, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &w ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &z ); bli_obj_free( &y_save ); bli_obj_free( &z_save ); // Finalize the context. bli_dotxaxpyf_cntx_finalize( &cntx ); }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; #if 0 side = BLIS_LEFT; #else side = BLIS_RIGHT; #endif #if 0 uploa = BLIS_LOWER; #else uploa = BLIS_UPPER; #endif transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_max; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, n, n, 1, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); bli_randm( &a ); bli_mktrim( &a ); // Load the diagonal of A to make it more likely to be invertible. bli_shiftd( &BLIS_TWO, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &c, &c_save ); #if 0 //def BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trsm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); strsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); dtrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); #else scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); #endif ctrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); #else dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); #endif ztrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; }
void libblis_test_gemm_md ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t dt_a, dt_b, dt_c; num_t dt_complex; dim_t m, n, k; trans_t transa; trans_t transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Decode the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &dt_c ); bli_param_map_char_to_blis_dt( dc_str[1], &dt_a ); bli_param_map_char_to_blis_dt( dc_str[2], &dt_b ); // Project one of the datatypes (it doesn't matter which) to the // complex domain. dt_complex = bli_dt_proj_to_complex( dt_c ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[2], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_trans( pc_str[1], &transb ); // Create test scalars. bli_obj_scalar_init_detached( dt_complex, &alpha ); bli_obj_scalar_init_detached( dt_complex, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, dt_a, transa, sc_str[1], m, k, &a ); libblis_test_mobj_create( params, dt_b, transb, sc_str[2], k, n, &b ); libblis_test_mobj_create( params, dt_c, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c ); libblis_test_mobj_create( params, dt_c, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c_save ); // For mixed-precision, set the computation precision of C. if ( params->mixed_precision ) { num_t dt_comp; prec_t comp_prec; // The computation precision is encoded in the computation datatype, // which appears as an additional char in dc_str. bli_param_map_char_to_blis_dt( dc_str[3], &dt_comp ); // Extract the precision from the computation datatype. comp_prec = bli_dt_prec( dt_comp ); // Set the computation precision of C. bli_obj_set_comp_prec( comp_prec, &c ); } // Set alpha and beta. { bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( 1.2, 0.5, &beta ); //bli_setsc( 1.0, 0.0, &alpha ); //bli_setsc( 1.0, 0.0, &beta ); } // Randomize A, B, and C, and save C. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_randomize( params, TRUE, &b ); libblis_test_mobj_randomize( params, TRUE, &c ); bli_copym( &c, &c_save ); // Apply the parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); #if 0 bli_printm( "a", &a, "%5.2f", "" ); bli_printm( "b", &b, "%5.2f", "" ); bli_printm( "c", &c, "%5.2f", "" ); bli_printm( "alpha", &alpha, "%5.2f", "" ); bli_printm( "beta", &beta, "%5.2f", "" ); #endif libblis_test_gemm_impl( iface, &alpha, &a, &b, &beta, &c ); #if 0 bli_printm( "c after", &c, "%5.2f", "" ); #endif time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. //*perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; //if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; *perf = libblis_test_gemm_flops( &a, &b, &c ) / time_min / FLOPS_PER_UNIT_PERF; // Perform checks. libblis_test_gemm_md_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); }
int main( int argc, char** argv ) { obj_t a, x, y; obj_t a_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_x, dt_y; num_t dt_alpha; int r, n_repeats; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 15; n_input = 15; #endif dt_alpha = dt_x = dt_y = dt_a = BLIS_DOUBLE; for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_y, n, 1, 0, 0, &y ); bli_obj_create( dt_a, m, n, 0, 0, &a ); bli_obj_create( dt_a, m, n, 0, 0, &a_save ); bli_randm( &x ); bli_randm( &y ); bli_randm( &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &a, &a_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &a_save, &a ); dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "y", &y, "%4.1f", "" ); bli_printm( "a", &a, "%4.1f", "" ); #endif #ifdef BLIS bli_ger( &alpha, &x, &y, &a ); #else f77_int mm = bli_obj_length( a ); f77_int nn = bli_obj_width( a ); f77_int incx = bli_obj_vector_inc( x ); f77_int incy = bli_obj_vector_inc( y ); f77_int lda = bli_obj_col_stride( a ); double* alphap = bli_obj_buffer( alpha ); double* xp = bli_obj_buffer( x ); double* yp = bli_obj_buffer( y ); double* ap = bli_obj_buffer( a ); dger_( &mm, &nn, alphap, xp, &incx, yp, &incy, ap, &lda ); #endif #ifdef PRINT bli_printm( "a after", &a, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_ger_blis" ); #else printf( "data_ger_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &a_save ); } bli_finalize(); return 0; }
void libblis_test_her2k_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; dim_t m, k; uplo_t uploc; trans_t transa, transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploc ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); bli_param_map_char_to_blis_trans( pc_str[2], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[0], m, k, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[1], m, k, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, m, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, m, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( c ) ) { bli_setsc( 0.8, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { // For her2k, alpha may be complex, but beta must be real-valued // (in order to preserve the Hermitian structure of C). bli_setsc( 0.8, 0.5, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } // Randomize A and B. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_randomize( params, TRUE, &b ); // Set the structure and uplo properties of C. bli_obj_set_struc( BLIS_HERMITIAN, c ); bli_obj_set_uplo( uploc, c ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &c ); bli_mkherm( &c ); bli_mktrim( &c ); // Save C and set its structure and uplo properties. bli_obj_set_struc( BLIS_HERMITIAN, c_save ); bli_obj_set_uplo( uploc, c_save ); bli_copym( &c, &c_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_conjtrans( transb, b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_her2k_impl( iface, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( c ) ) *perf *= 4.0; // Perform checks. libblis_test_her2k_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); }
void libblis_test_her_check( obj_t* alpha, obj_t* x, obj_t* a, obj_t* a_orig, double* resid ) { num_t dt = bli_obj_datatype( *a ); num_t dt_real = bli_obj_datatype_proj_to_real( *a ); dim_t m_a = bli_obj_length( *a ); obj_t xh, t, v, w; obj_t tau, rho, norm; double junk; // // Pre-conditions: // - x is randomized. // - a is randomized and Hermitian. // Note: // - alpha must be real-valued. // // Under these conditions, we assume that the implementation for // // A := A_orig + alpha * conjx(x) * conjx(x)^H // // is functioning correctly if // // normf( v - w ) // // is negligible, where // // v = A * t // w = ( A_orig + alpha * conjx(x) * conjx(x)^H ) * t // = A_orig * t + alpha * conjx(x) * conjx(x)^H * t // = A_orig * t + alpha * conjx(x) * rho // = A_orig * t + w // bli_mkherm( a ); bli_mkherm( a_orig ); bli_obj_set_struc( BLIS_GENERAL, *a ); bli_obj_set_struc( BLIS_GENERAL, *a_orig ); bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); bli_obj_scalar_init_detached( dt, &tau ); bli_obj_scalar_init_detached( dt, &rho ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); bli_obj_create( dt, m_a, 1, 0, 0, &w ); bli_obj_alias_with_conj( BLIS_CONJUGATE, *x, xh ); bli_setsc( 1.0/( double )m_a, -1.0/( double )m_a, &tau ); bli_setv( &tau, &t ); bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v ); bli_dotv( &xh, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, x, &w ); bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w ); bli_subv( &w, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); }
void libblis_test_her_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m; uplo_t uploa; conj_t conjx; obj_t alpha, x, a; obj_t a_save; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, m, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, m, &a_save ); // Set alpha. //bli_copysc( &BLIS_MINUS_ONE, &alpha ); bli_setsc( -1.0, 0.0, &alpha ); // Randomize x. bli_randv( &x ); // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_HERMITIAN, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); // Save A and set its structure and uplo properties. bli_obj_set_struc( BLIS_HERMITIAN, a_save ); bli_obj_set_uplo( uploa, a_save ); bli_copym( &a, &a_save ); // Apply the remaining parameters. bli_obj_set_conj( conjx, x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &a_save, &a ); time = bli_clock(); libblis_test_her_impl( iface, &alpha, &x, &a ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( a ) ) *perf *= 4.0; // Perform checks. libblis_test_her_check( &alpha, &x, &a, &a_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &a, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &a ); bli_obj_free( &a_save ); }
void libblis_test_subm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { double time_min = DBL_MAX; double time; dim_t m, n; trans_t transx; obj_t alpha, beta; obj_t x, y; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, sc_str[0], m, n, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, n, &y ); // Initialize alpha and beta. bli_setsc( 1.0, 1.0, &alpha ); bli_setsc( 3.0, 3.0, &beta ); // Randomize x. bli_setm( &alpha, &x ); bli_setm( &beta, &y ); // Apply the parameters. bli_obj_set_conjtrans( transx, x ); // Disable repeats since bli_copym() is not yet tested. //for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_subm_impl( iface, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( x ) ) *perf *= 2.0; // Perform checks. libblis_test_subm_check( params, &alpha, &beta, &x, &y, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); }
void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, test_op_t* op, mt_impl_t impl, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n, k; char sc_a = 'c'; char sc_b = 'r'; side_t side = BLIS_LEFT; uplo_t uploa; obj_t kappa; obj_t alpha; obj_t a_big, a, b; obj_t b11, c11; obj_t ap, bp; obj_t a1xp, a11p, bx1p, b11p; obj_t c11_save; // Map the dimension specifier to actual dimensions. k = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Fix m and n to MR and NR, respectively. m = bli_blksz_for_type( datatype, gemm_mr ); n = bli_blksz_for_type( datatype, gemm_nr ); // Store the register blocksizes so that the driver can retrieve the // values later when printing results. op->dim_aux[0] = m; op->dim_aux[1] = n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &kappa ); bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_a, k+m, k+m, &a_big ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_b, k+m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c11 ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c11_save ); // Set alpha. if ( bli_obj_is_real( b ) ) { bli_setsc( 2.0, 0.0, &alpha ); } else { bli_setsc( 2.0, 0.0, &alpha ); } // Set the structure, uplo, and diagonal offset properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, a_big ); bli_obj_set_uplo( uploa, a_big ); // Randomize A and make it densely triangular. bli_randm( &a_big ); // Normalize B and save. bli_randm( &b ); bli_setsc( 1.0/( double )m, 0.0, &kappa ); bli_scalm( &kappa, &b ); // Use the last m rows of A_big as A. bli_acquire_mpart_t2b( BLIS_SUBPART1, k, m, &a_big, &a ); // Locate the B11 block of B, copy to C11, and save. if ( bli_obj_is_lower( a ) ) bli_acquire_mpart_t2b( BLIS_SUBPART1, k, m, &b, &b11 ); else bli_acquire_mpart_t2b( BLIS_SUBPART1, 0, m, &b, &b11 ); bli_copym( &b11, &c11 ); bli_copym( &c11, &c11_save ); // Initialize pack objects. bli_obj_init_pack( &ap ); bli_obj_init_pack( &bp ); // Create pack objects for a and b. libblis_test_pobj_create( gemm_mr, gemm_mr, BLIS_INVERT_DIAG, BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK, &a, &ap ); libblis_test_pobj_create( gemm_mr, gemm_nr, BLIS_NO_INVERT_DIAG, BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL, &b, &bp ); // Pack the contents of a to ap. bli_packm_blk_var3( &a, &ap ); // Pack the contents of b to bp. bli_packm_blk_var2( &b, &bp ); // Create subpartitions from the a and b panels. bli_gemmtrsm_ukr_make_subparts( k, &ap, &bp, &a1xp, &a11p, &bx1p, &b11p ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c11_save, &c11 ); // Re-pack the contents of b to bp. bli_packm_blk_var2( &b, &bp ); time = bli_clock(); libblis_test_gemmtrsm_ukr_impl( impl, side, &alpha, &a1xp, &a11p, &bx1p, &b11p, &c11 ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n * k + 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( b ) ) *perf *= 4.0; // Perform checks. libblis_test_gemmtrsm_ukr_check( side, &alpha, &a1xp, &a11p, &bx1p, &b11p, &c11, &c11_save, resid ); // Zero out performance and residual if output matrix is empty. //libblis_test_check_empty_problem( &c11, perf, resid ); // Release packing buffers within pack objects. bli_obj_release_pack( &ap ); bli_obj_release_pack( &bp ); // Free the test objects. bli_obj_free( &a_big ); bli_obj_free( &b ); bli_obj_free( &c11 ); bli_obj_free( &c11_save ); }
void libblis_test_axpyf_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, b_n; conj_t conja, conjx; obj_t alpha, a, x, y; obj_t y_save; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Query the operation's fusing factor for the current datatype. b_n = bli_axpyf_fusefac( datatype ); // Store the fusing factor so that the driver can retrieve the value // later when printing results. op->dim_aux[0] = b_n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conja ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, b_n, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], b_n, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save ); // Set alpha. if ( bli_obj_is_real( y ) ) { bli_setsc( -1.0, 0.0, &alpha ); } else { bli_setsc( 0.0, -1.0, &alpha ); } // Randomize A, x, and y, and save y. bli_randm( &a ); bli_randv( &x ); bli_randv( &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conja, a ); bli_obj_set_conj( conjx, x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_axpyf_impl( iface, &alpha, &a, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( y ) ) *perf *= 4.0; // Perform checks. libblis_test_axpyf_check( &alpha, &a, &x, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); }
void libblis_test_gemmtrsm_ukr_check( side_t side, obj_t* alpha, obj_t* a1x, obj_t* a11, obj_t* bx1, obj_t* b11, obj_t* c11, obj_t* c11_orig, double* resid ) { num_t dt = bli_obj_datatype( *b11 ); num_t dt_real = bli_obj_datatype_proj_to_real( *b11 ); dim_t m = bli_obj_length( *b11 ); dim_t n = bli_obj_width( *b11 ); dim_t k = bli_obj_width( *a1x ); obj_t kappa, norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a1x, a11, bx1, c11_orig are randomized; a11 is triangular. // - contents of b11 == contents of c11. // - side == BLIS_LEFT. // // Under these conditions, we assume that the implementation for // // B := inv(A11) * ( alpha * B11 - A1x * Bx1 ) (side = left) // // is functioning correctly if // // fnorm( v - z ) // // is negligible, where // // v = B11 * t // // z = ( inv(A11) * ( alpha * B11_orig - A1x * Bx1 ) ) * t // = inv(A11) * ( alpha * B11_orig * t - A1x * Bx1 * t ) // = inv(A11) * ( alpha * B11_orig * t - A1x * w ) // bli_obj_scalar_init_detached( dt, &kappa ); bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { // BLIS does not currently support right-side micro-kernels. bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } bli_randv( &t ); bli_setsc( 1.0/( double )n, 0.0, &kappa ); bli_scalv( &kappa, &t ); bli_gemv( &BLIS_ONE, b11, &t, &BLIS_ZERO, &v ); // Restore the diagonal of a11 to its original, un-inverted state // (needed for trsv). bli_invertd( a11 ); if ( bli_is_left( side ) ) { bli_gemv( &BLIS_ONE, bx1, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, c11_orig, &t, &BLIS_ZERO, &z ); bli_gemv( &BLIS_MINUS_ONE, a1x, &w, &BLIS_ONE, &z ); bli_trsv( &BLIS_ONE, a11, &z ); } else // else if ( bli_is_left( side ) ) { // BLIS does not currently support right-side micro-kernels. bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } bli_subv( &z, &v ); bli_fnormv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); }
void libblis_test_gemv_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n; trans_t transa; conj_t conjx; obj_t kappa; obj_t alpha, a, x, beta, y; obj_t y_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &kappa ); bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[0], m, n, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], n, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save ); // Set alpha and beta. if ( bli_obj_is_real( y ) ) { bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 0.0, 2.0, &alpha ); bli_setsc( 0.0, -1.0, &beta ); } // Initialize diagonal of matrix A. bli_setsc( 2.0, -1.0, &kappa ); bli_setm( &BLIS_ZERO, &a ); bli_setd( &kappa, &a ); // Randomize x and y, and save y. bli_randv( &x ); bli_randv( &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_conj( conjx, x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_gemv_impl( iface, &alpha, &a, &x, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( y ) ) *perf *= 4.0; // Perform checks. libblis_test_gemv_check( &kappa, &alpha, &a, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); }
void libblis_test_fnormv_experiment( test_params_t* params, test_op_t* op, mt_impl_t impl, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; num_t dt_real = bli_datatype_proj_to_real( datatype ); double time_min = 1e9; double time; dim_t m; obj_t beta, norm; obj_t x; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); bli_obj_scalar_init_detached( dt_real, &norm ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); // Initialize beta to 2 - 2i. bli_setsc( 2.0, -2.0, &beta ); // Set all elements of x to beta. bli_setv( &beta, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_fnormv_impl( impl, &x, &norm ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( x ) ) *perf *= 2.0; // Perform checks. libblis_test_fnormv_check( &beta, &x, &norm, resid ); // Zero out performance and residual if input vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; double dtime; double dtime_save; double gflops; int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 16; p_end = 2048; p_inc = 16; m_input = 10240; n_input = 10240; k_input = -1; #else p_begin = 24; p_end = 24; p_inc = 1; m_input = -1; k_input = -1; n_input = -1; #endif dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; for ( p = p_begin + world_rank * p_inc ; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, k, 0, 0, &a ); bli_obj_create( dt_b, k, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_setsc( (1.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); { bli_gemm( &alpha, &a, &b, &beta, &c ); } #else char transa = 'N'; char transb = 'N'; int mm = bli_obj_length( c ); int kk = bli_obj_width_after_trans( a ); int nn = bli_obj_width( c ); int lda = bli_obj_col_stride( a ); int ldb = bli_obj_col_stride( b ); int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); //if(world_rank == 0){ #ifdef BLIS printf( "data_gemm_blis" ); #else printf( "data_gemm_%s", BLAS ); #endif printf( "( %2ld, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f %d ];\n", (p - p_begin + 1)/p_inc + 1, m, k, n, dtime_save, gflops, world_rank ); //} bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); MPI_Finalize(); return 0; }
void libblis_test_syr2_check( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, obj_t* a_orig, double* resid ) { num_t dt = bli_obj_datatype( *a ); num_t dt_real = bli_obj_datatype_proj_to_real( *a ); dim_t m_a = bli_obj_length( *a ); obj_t xt, yt; obj_t t, v, w1, w2; obj_t tau, rho, norm; double junk; // // Pre-conditions: // - x is randomized. // - y is randomized. // - a is randomized and symmetric. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // A := A_orig + alpha * conjx(x) * conjy(y)^T + alpha * conjy(y) * conjx(x)^T // // is functioning correctly if // // normf( v - w ) // // is negligible, where // // v = A * t // w = ( A_orig + alpha * conjx(x) * conjy(y)^T + alpha * conjy(y) * conjx(x)^T ) * t // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + alpha * conjy(y) * conjx(x)^T * t // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + alpha * conjy(y) * rho // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + w1 // = A_orig * t + alpha * conjx(x) * rho + w1 // = A_orig * t + w2 + w1 // bli_mksymm( a ); bli_mksymm( a_orig ); bli_obj_set_struc( BLIS_GENERAL, *a ); bli_obj_set_struc( BLIS_GENERAL, *a_orig ); bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); bli_obj_scalar_init_detached( dt, &tau ); bli_obj_scalar_init_detached( dt, &rho ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); bli_obj_create( dt, m_a, 1, 0, 0, &w1 ); bli_obj_create( dt, m_a, 1, 0, 0, &w2 ); bli_obj_alias_to( *x, xt ); bli_obj_alias_to( *y, yt ); bli_setsc( 1.0/( double )m_a, -1.0/( double )m_a, &tau ); bli_setv( &tau, &t ); bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v ); bli_dotv( &xt, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, y, &w1 ); bli_dotv( &yt, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, x, &w2 ); bli_addv( &w2, &w1 ); bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w1 ); bli_subv( &w1, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w1 ); bli_obj_free( &w2 ); }
void libblis_test_addv_experiment( test_params_t* params, test_op_t* op, mt_impl_t impl, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { double time_min = 1e9; double time; dim_t m; conj_t conjx; obj_t alpha, beta; obj_t x, y; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); // Initialize alpha and beta. bli_setsc( -1.0, -1.0, &alpha ); bli_setsc( 3.0, 3.0, &beta ); // Set x and y to alpha and beta, respectively. bli_setv( &alpha, &x ); bli_setv( &beta, &y ); // Apply the parameters. bli_obj_set_conj( conjx, x ); // Disable repeats since bli_copyv() is not yet tested. //for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_addv_impl( impl, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( x ) ) *perf *= 2.0; // Perform checks. libblis_test_addv_check( &alpha, &beta, &x, &y, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt; int r, n_repeats; uplo_t uploc; trans_t transa; f77_char f77_uploc; f77_char f77_transa; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 3; k_input = 1; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif uploc = BLIS_LOWER; //uploc = BLIS_UPPER; transa = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_uplo( uploc, &f77_uploc ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_does_trans( transa ) ) bli_obj_create( dt, k, m, 0, 0, &a ); else bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, m, m, 0, 0, &c ); bli_obj_create( dt, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, c ); bli_obj_set_uplo( uploc, c ); bli_obj_set_conjtrans( transa, a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_herk( &alpha, &a, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); ssyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); float* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); cherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_herk_blis" ); #else printf( "data_herk_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
void libblis_test_axpyv_experiment( test_params_t* params, test_op_t* op, mt_impl_t impl, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m; conj_t conjx; obj_t alpha, x, y; obj_t y_save; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save ); // Set alpha. //bli_setsc( sqrt(2.0)/2.0, sqrt(2.0)/2.0, &alpha ); //bli_copysc( &BLIS_TWO, &alpha ); if ( bli_obj_is_real( y ) ) bli_setsc( -2.0, 0.0, &alpha ); else bli_setsc( 0.0, -2.0, &alpha ); // Randomize x and y, and save y. bli_randv( &x ); bli_randv( &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conjx, x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_axpyv_impl( impl, &alpha, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( y ) ) *perf *= 4.0; // Perform checks. libblis_test_axpyv_check( &alpha, &x, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); }
void libblis_test_trsm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n; dim_t mn_side; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; obj_t alpha, a, b; obj_t b_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_side( pc_str[0], &side ); bli_param_map_char_to_blis_uplo( pc_str[1], &uploa ); bli_param_map_char_to_blis_trans( pc_str[2], &transa ); bli_param_map_char_to_blis_diag( pc_str[3], &diaga ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); libblis_test_mobj_create( params, datatype, transa, sc_str[0], mn_side, mn_side, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, n, &b_save ); // Set alpha. if ( bli_obj_is_real( b ) ) { bli_setsc( 2.0, 0.0, &alpha ); } else { bli_setsc( 2.0, 0.0, &alpha ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, load the diagonal, make it densely triangular. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_load_diag( params, &a ); bli_mktrim( &a ); // Randomize B and save B. libblis_test_mobj_randomize( params, TRUE, &b ); bli_copym( &b, &b_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_diag( diaga, a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &b_save, &b ); time = bli_clock(); libblis_test_trsm_impl( iface, side, &alpha, &a, &b ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( b ) ) *perf *= 4.0; // Perform checks. libblis_test_trsm_check( params, side, &alpha, &a, &b, &b_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &b, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &b_save ); }
void libblis_test_scalv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjbeta; obj_t beta, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[0], m, &y_save ); // Set beta. if ( bli_obj_is_real( &y ) ) bli_setsc( -2.0, 0.0, &beta ); else bli_setsc( 0.0, -2.0, &beta ); // Randomize and save y. libblis_test_vobj_randomize( params, FALSE, &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conjbeta, &beta ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_scalv_impl( iface, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 6.0; // Perform checks. libblis_test_scalv_check( params, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &y ); bli_obj_free( &y_save ); }
void libblis_test_trmv_experiment( test_params_t* params, test_op_t* op, mt_impl_t impl, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m; uplo_t uploa; trans_t transa; diag_t diaga; obj_t kappa; obj_t alpha, a, x; obj_t x_save; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); bli_param_map_char_to_blis_diag( pc_str[2], &diaga ); // Create test scalars. bli_obj_init_scalar( datatype, &alpha ); bli_obj_init_scalar( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x_save ); // Set alpha. if ( bli_obj_is_real( x ) ) bli_setsc( -1.0, 0.0, &alpha ); else bli_setsc( 0.0, -1.0, &alpha ); // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, make it densely triangular. bli_randm( &a ); bli_mktrim( &a ); // Randomize x and save. bli_randv( &x ); bli_copyv( &x, &x_save ); // Normalize vectors by m. bli_setsc( 1.0/( double )m, 0.0, &kappa ); bli_scalv( &kappa, &x ); bli_scalv( &kappa, &x_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_diag( diaga, a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &x_save, &x ); time = bli_clock(); libblis_test_trmv_impl( impl, &alpha, &a, &x ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( x ) ) *perf *= 4.0; // Perform checks. libblis_test_trmv_check( &alpha, &a, &x, &x_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &x_save ); }
void libblis_test_gemm_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n, k; trans_t transa; trans_t transb; obj_t kappa; obj_t alpha, a, b, beta, c; obj_t c_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[2], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_trans( pc_str[1], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &kappa ); bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[0], m, k, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[1], k, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( c ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 1.2, 0.8, &alpha ); bli_setsc( -1.0, 1.0, &beta ); } // Randomize A, B, and C, and save C. bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_copym( &c, &c_save ); // Normalize by k. bli_setsc( 1.0/( double )k, 0.0, &kappa ); bli_scalm( &kappa, &a ); bli_scalm( &kappa, &b ); // Apply the parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_conjtrans( transb, b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_gemm_impl( iface, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( c ) ) *perf *= 4.0; // Perform checks. libblis_test_gemm_check( &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); }