void libblis_test_xpbym_check ( test_params_t* params, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_length( y ); dim_t n = bli_obj_width( y ); obj_t x_temp, y_temp; obj_t norm; double junk; // // Pre-conditions: // - x is randomized. // - y_orig is randomized. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + conjx(x) // // is functioning correctly if // // normf( y - ( beta * y_orig + conjx(x) ) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, n, 0, 0, &x_temp ); bli_obj_create( dt, m, n, 0, 0, &y_temp ); bli_copym( x, &x_temp ); bli_copym( y_orig, &y_temp ); bli_scalm( beta, &y_temp ); bli_addm( &x_temp, &y_temp ); bli_subm( &y_temp, y ); bli_normfm( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); bli_obj_free( &y_temp ); }
void libblis_test_axpbyv_check( obj_t* alpha, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_datatype( *y ); num_t dt_real = bli_obj_datatype_proj_to_real( *y ); dim_t m = bli_obj_vector_dim( *y ); obj_t x_temp, y_temp; obj_t norm; double junk; // // Pre-conditions: // - x is randomized. // - y_orig is randomized. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * conjx(x) // // is functioning correctly if // // normf( y - ( beta * y_orig + alpha * conjx(x) ) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); bli_obj_create( dt, m, 1, 0, 0, &y_temp ); bli_copyv( x, &x_temp ); bli_copyv( y_orig, &y_temp ); bli_scalv( alpha, &x_temp ); bli_scalv( beta, &y_temp ); bli_addv( &x_temp, &y_temp ); bli_subv( &y_temp, y ); bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); bli_obj_free( &y_temp ); }
void bli_obj_create_const( double value, obj_t* obj ) { gint_t* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; if ( bli_error_checking_is_enabled() ) bli_obj_create_const_check( value, obj ); bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, obj ); temp_s = bli_obj_buffer_for_const( BLIS_FLOAT, *obj ); temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE, *obj ); temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *obj ); temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *obj ); temp_i = bli_obj_buffer_for_const( BLIS_INT, *obj ); // Use the bli_??sets() macros to set the temp variables in order to // properly support BLIS_ENABLE_C99_COMPLEX. bli_dssets( value, 0.0, *temp_s ); bli_ddsets( value, 0.0, *temp_d ); bli_dcsets( value, 0.0, *temp_c ); bli_dzsets( value, 0.0, *temp_z ); *temp_i = ( gint_t ) value; }
void libblis_test_hemv_check( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_datatype( *y ); num_t dt_real = bli_obj_datatype_proj_to_real( *y ); dim_t m = bli_obj_vector_dim( *y ); obj_t v; obj_t norm; double junk; // // Pre-conditions: // - a is randomized and Hermitian. // - x is randomized. // - y_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * conja(A) * conjx(x) // // is functioning correctly if // // normf( y - v ) // // is negligible, where // // v = beta * y_orig + alpha * conja(A_dense) * x // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_copyv( y_orig, &v ); bli_mkherm( a ); bli_obj_set_struc( BLIS_GENERAL, *a ); bli_obj_set_uplo( BLIS_DENSE, *a ); bli_gemv( alpha, a, x, beta, &v ); bli_subv( &v, y ); bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); }
void libblis_test_scalv_check ( test_params_t* params, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( y ); obj_t norm_y_r; obj_t nbeta; obj_t y2; double junk; // // Pre-conditions: // - y_orig is randomized. // Note: // - beta should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := conjbeta(beta) * y_orig // // is functioning correctly if // // normf( y + -conjbeta(beta) * y_orig ) // // is negligible. // bli_obj_create( dt, m, 1, 0, 0, &y2 ); bli_copyv( y_orig, &y2 ); bli_obj_scalar_init_detached( dt, &nbeta ); bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_copysc( beta, &nbeta ); bli_mulsc( &BLIS_MINUS_ONE, &nbeta ); bli_scalv( &nbeta, &y2 ); bli_addv( &y2, y ); bli_normfv( y, &norm_y_r ); bli_getsc( &norm_y_r, resid, &junk ); bli_obj_free( &y2 ); }
void bli_obj_create_const_copy_of( obj_t* a, obj_t* b ) { gint_t* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; void* buf_a; dcomplex value; if ( bli_error_checking_is_enabled() ) bli_obj_create_const_copy_of_check( a, b ); bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, b ); temp_s = bli_obj_buffer_for_const( BLIS_FLOAT, *b ); temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE, *b ); temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *b ); temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *b ); temp_i = bli_obj_buffer_for_const( BLIS_INT, *b ); buf_a = bli_obj_buffer_at_off( *a ); bli_zzsets( 0.0, 0.0, value ); if ( bli_obj_is_float( *a ) ) { bli_szcopys( *(( float* )buf_a), value ); } else if ( bli_obj_is_double( *a ) ) { bli_dzcopys( *(( double* )buf_a), value ); } else if ( bli_obj_is_scomplex( *a ) ) { bli_czcopys( *(( scomplex* )buf_a), value ); } else if ( bli_obj_is_dcomplex( *a ) ) { bli_zzcopys( *(( dcomplex* )buf_a), value ); } else { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } bli_zscopys( value, *temp_s ); bli_zdcopys( value, *temp_d ); bli_zccopys( value, *temp_c ); bli_zzcopys( value, *temp_z ); *temp_i = ( gint_t ) bli_zreal( value ); }
int main( int argc, char** argv ) { obj_t alpha, beta, gamma; obj_t x, y, z, w, a; num_t dt; dim_t m, n; inc_t rs, cs; // // This file demonstrates working with vector objects and the level-1v // operations. // // // Example 1: Create vector objects and then broadcast (copy) scalar // values to all elements. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create a few vectors to work with. We make them all of the same length // so that we can perform operations between them. // NOTE: We've chosen to use row vectors here (1x4) instead of column // vectors (4x1) to allow for easier reading of standard output (less // scrolling). dt = BLIS_DOUBLE; m = 1; n = 4; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &x ); bli_obj_create( dt, m, n, rs, cs, &y ); bli_obj_create( dt, m, n, rs, cs, &z ); bli_obj_create( dt, m, n, rs, cs, &w ); bli_obj_create( dt, m, n, rs, cs, &a ); // Let's also create and initialize some scalar objects. bli_obj_create_1x1( dt, &alpha ); bli_obj_create_1x1( dt, &beta ); bli_obj_create_1x1( dt, &gamma ); bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( 0.2, 0.0, &beta ); bli_setsc( 3.0, 0.0, &gamma ); bli_printm( "alpha:", &alpha, "%4.1f", "" ); bli_printm( "beta:", &beta, "%4.1f", "" ); bli_printm( "gamma:", &gamma, "%4.1f", "" ); // Vectors can set by "broadcasting" a constant to every element. bli_setv( &BLIS_ONE, &x ); bli_setv( &alpha, &y ); bli_setv( &BLIS_ZERO, &z ); // Note that we can use printv or printm to print vectors since vectors // are also matrices. We choose to use printm because it honors the // orientation of the vector (row or column) when printing, whereas // printv always prints vectors as column vectors regardless of their // they are 1 x n or n x 1. bli_printm( "x := 1.0", &x, "%4.1f", "" ); bli_printm( "y := alpha", &y, "%4.1f", "" ); bli_printm( "z := 0.0", &z, "%4.1f", "" ); // // Example 2: Randomize a vector object. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Set a vector to random values. bli_randv( &w ); bli_printm( "w := randv()", &w, "%4.1f", "" ); // // Example 3: Perform various element-wise operations on vector objects. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Copy a vector. bli_copyv( &w, &a ); bli_printm( "a := w", &a, "%4.1f", "" ); // Add and subtract vectors. bli_addv( &y, &a ); bli_printm( "a := a + y", &a, "%4.1f", "" ); bli_subv( &w, &a ); bli_printm( "a := a - w", &a, "%4.1f", "" ); // Scale a vector (destructive). bli_scalv( &beta, &a ); bli_printm( "a := beta * a", &a, "%4.1f", "" ); // Scale a vector (non-destructive). bli_scal2v( &gamma, &a, &z ); bli_printm( "z := gamma * a", &z, "%4.1f", "" ); // Scale and accumulate between vectors. bli_axpyv( &alpha, &w, &x ); bli_printm( "x := x + alpha * w", &x, "%4.1f", "" ); bli_xpbyv( &w, &BLIS_MINUS_ONE, &x ); bli_printm( "x := -1.0 * x + w", &x, "%4.1f", "" ); // Invert a vector element-wise. bli_invertv( &y ); bli_printm( "y := 1 / y", &y, "%4.1f", "" ); // Swap two vectors. bli_swapv( &x, &y ); bli_printm( "x (after swapping with y)", &x, "%4.1f", "" ); bli_printm( "y (after swapping with x)", &y, "%4.1f", "" ); // // Example 4: Perform contraction-like operations on vector objects. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Perform a dot product. bli_dotv( &a, &z, &gamma ); bli_printm( "gamma := a * z (dot product)", &gamma, "%5.2f", "" ); // Perform an extended dot product. bli_dotxv( &alpha, &a, &z, &BLIS_ONE, &gamma ); bli_printm( "gamma := 1.0 * gamma + alpha * a * z (accumulate scaled dot product)", &gamma, "%5.2f", "" ); // Free the objects. bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &gamma ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &z ); bli_obj_free( &w ); bli_obj_free( &a ); return 0; }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt; int r, n_repeats; uplo_t uploc; trans_t transa; f77_char f77_uploc; f77_char f77_transa; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 3; k_input = 1; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif uploc = BLIS_LOWER; //uploc = BLIS_UPPER; transa = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_uplo( uploc, &f77_uploc ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_does_trans( transa ) ) bli_obj_create( dt, k, m, 0, 0, &a ); else bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, m, m, 0, 0, &c ); bli_obj_create( dt, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, c ); bli_obj_set_uplo( uploc, c ); bli_obj_set_conjtrans( transa, a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_herk( &alpha, &a, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); ssyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); float* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); cherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_herk_blis" ); #else printf( "data_herk_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
void libblis_test_syr2_check( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, obj_t* a_orig, double* resid ) { num_t dt = bli_obj_datatype( *a ); num_t dt_real = bli_obj_datatype_proj_to_real( *a ); dim_t m_a = bli_obj_length( *a ); obj_t xt, yt; obj_t t, v, w1, w2; obj_t tau, rho, norm; double junk; // // Pre-conditions: // - x is randomized. // - y is randomized. // - a is randomized and symmetric. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // A := A_orig + alpha * conjx(x) * conjy(y)^T + alpha * conjy(y) * conjx(x)^T // // is functioning correctly if // // normf( v - w ) // // is negligible, where // // v = A * t // w = ( A_orig + alpha * conjx(x) * conjy(y)^T + alpha * conjy(y) * conjx(x)^T ) * t // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + alpha * conjy(y) * conjx(x)^T * t // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + alpha * conjy(y) * rho // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + w1 // = A_orig * t + alpha * conjx(x) * rho + w1 // = A_orig * t + w2 + w1 // bli_mksymm( a ); bli_mksymm( a_orig ); bli_obj_set_struc( BLIS_GENERAL, *a ); bli_obj_set_struc( BLIS_GENERAL, *a_orig ); bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); bli_obj_scalar_init_detached( dt, &tau ); bli_obj_scalar_init_detached( dt, &rho ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); bli_obj_create( dt, m_a, 1, 0, 0, &w1 ); bli_obj_create( dt, m_a, 1, 0, 0, &w2 ); bli_obj_alias_to( *x, xt ); bli_obj_alias_to( *y, yt ); bli_setsc( 1.0/( double )m_a, -1.0/( double )m_a, &tau ); bli_setv( &tau, &t ); bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v ); bli_dotv( &xt, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, y, &w1 ); bli_dotv( &yt, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, x, &w2 ); bli_addv( &w2, &w1 ); bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w1 ); bli_subv( &w1, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w1 ); bli_obj_free( &w2 ); }
void libblis_test_gemmtrsm_ukr_check( side_t side, obj_t* alpha, obj_t* a1x, obj_t* a11, obj_t* bx1, obj_t* b11, obj_t* c11, obj_t* c11_orig, double* resid ) { num_t dt = bli_obj_datatype( *b11 ); num_t dt_real = bli_obj_datatype_proj_to_real( *b11 ); dim_t m = bli_obj_length( *b11 ); dim_t n = bli_obj_width( *b11 ); dim_t k = bli_obj_width( *a1x ); obj_t kappa, norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a1x, a11, bx1, c11_orig are randomized; a11 is triangular. // - contents of b11 == contents of c11. // - side == BLIS_LEFT. // // Under these conditions, we assume that the implementation for // // B := inv(A11) * ( alpha * B11 - A1x * Bx1 ) (side = left) // // is functioning correctly if // // fnorm( v - z ) // // is negligible, where // // v = B11 * t // // z = ( inv(A11) * ( alpha * B11_orig - A1x * Bx1 ) ) * t // = inv(A11) * ( alpha * B11_orig * t - A1x * Bx1 * t ) // = inv(A11) * ( alpha * B11_orig * t - A1x * w ) // bli_obj_scalar_init_detached( dt, &kappa ); bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { // BLIS does not currently support right-side micro-kernels. bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } bli_randv( &t ); bli_setsc( 1.0/( double )n, 0.0, &kappa ); bli_scalv( &kappa, &t ); bli_gemv( &BLIS_ONE, b11, &t, &BLIS_ZERO, &v ); // Restore the diagonal of a11 to its original, un-inverted state // (needed for trsv). bli_invertd( a11 ); if ( bli_is_left( side ) ) { bli_gemv( &BLIS_ONE, bx1, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, c11_orig, &t, &BLIS_ZERO, &z ); bli_gemv( &BLIS_MINUS_ONE, a1x, &w, &BLIS_ONE, &z ); bli_trsv( &BLIS_ONE, a11, &z ); } else // else if ( bli_is_left( side ) ) { // BLIS does not currently support right-side micro-kernels. bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } bli_subv( &z, &v ); bli_fnormv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); }
void libblis_test_gemv_check( obj_t* kappa, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_datatype( *y ); num_t dt_real = bli_obj_datatype_proj_to_real( *y ); conj_t conja = bli_obj_conj_status( *a ); dim_t n_x = bli_obj_vector_dim( *x ); dim_t m_y = bli_obj_vector_dim( *y ); dim_t min_m_n = bli_min( m_y, n_x ); obj_t x_temp, y_temp; obj_t kappac, norm; obj_t xT_temp, yT_temp, yT; double junk; // // Pre-conditions: // - a is initialized to kappa along the diagonal. // - x is randomized. // - y_orig is randomized. // Note: // - alpha, beta, and kappa should have non-zero imaginary components in // the complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * transa(A) * conjx(x) // // is functioning correctly if // // normf( y - z ) // // is negligible, where // // z = beta * y_orig + alpha * conja(kappa) * x // bli_obj_scalar_init_detached_copy_of( dt, conja, kappa, &kappac ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n_x, 1, 0, 0, &x_temp ); bli_obj_create( dt, m_y, 1, 0, 0, &y_temp ); bli_copyv( x, &x_temp ); bli_copyv( y_orig, &y_temp ); bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, &x_temp, &xT_temp ); bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, &y_temp, &yT_temp ); bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, y, &yT ); bli_scalv( &kappac, &xT_temp ); bli_scalv( beta, &yT_temp ); bli_axpyv( alpha, &xT_temp, &yT_temp ); bli_subv( &yT_temp, &yT ); bli_normfv( &yT, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); bli_obj_free( &y_temp ); }
void libblis_test_syr2k_check( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_datatype( *c ); num_t dt_real = bli_obj_datatype_proj_to_real( *c ); dim_t m = bli_obj_length( *c ); dim_t k = bli_obj_width_after_trans( *a ); obj_t at, bt; obj_t kappa, norm; obj_t t, v, w1, w2, z; double junk; // // Pre-conditions: // - a is randomized. // - b is randomized. // - c_orig is randomized and symmetric. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transb(B)^T + alpha * transb(B) * transa(A)^T // // is functioning correctly if // // normf( v - z ) // // is negligible, where // // v = C * t // z = ( beta * C_orig + alpha * transa(A) * transb(B)^T + alpha * transb(B) * transa(A)^T ) * t // = beta * C_orig * t + alpha * transa(A) * transb(B)^T * t + alpha * transb(B) * transa(A)^T * t // = beta * C_orig * t + alpha * transa(A) * transb(B)^T * t + alpha * transb(B) * w2 // = beta * C_orig * t + alpha * transa(A) * w1 + alpha * transb(B) * w2 // = beta * C_orig * t + alpha * transa(A) * w1 + z // = beta * C_orig * t + z // bli_obj_alias_with_trans( BLIS_TRANSPOSE, *a, at ); bli_obj_alias_with_trans( BLIS_TRANSPOSE, *b, bt ); bli_obj_scalar_init_detached( dt, &kappa ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w1 ); bli_obj_create( dt, k, 1, 0, 0, &w2 ); bli_obj_create( dt, m, 1, 0, 0, &z ); bli_randv( &t ); bli_setsc( 1.0/( double )m, 0.0, &kappa ); bli_scalv( &kappa, &t ); bli_symv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); bli_gemv( &BLIS_ONE, &at, &t, &BLIS_ZERO, &w2 ); bli_gemv( &BLIS_ONE, &bt, &t, &BLIS_ZERO, &w1 ); bli_gemv( alpha, a, &w1, &BLIS_ZERO, &z ); bli_gemv( alpha, b, &w2, &BLIS_ONE, &z ); bli_symv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w1 ); bli_obj_free( &w2 ); bli_obj_free( &z ); }
int main( int argc, char** argv ) { obj_t a, x; obj_t a_save; obj_t alpha; dim_t m; dim_t p; dim_t p_begin, p_end, p_inc; int m_input; num_t dt_a, dt_x; num_t dt_alpha; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 6; #endif #if 1 dt_alpha = dt_x = dt_a = BLIS_DOUBLE; #else dt_alpha = dt_x = dt_a = BLIS_DCOMPLEX; #endif uplo = BLIS_LOWER; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_her_blis" ); #else printf( "data_her_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_a, m, m, 0, 0, &a_save ); bli_randm( &x ); bli_randm( &a ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); //bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uplo, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &a, &a_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &a_save, &a ); dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "a", &a, "%4.1f", "" ); #endif #ifdef BLIS //bli_obj_toggle_conj( &x ); //bli_syr( &alpha, bli_her( &alpha, &x, &a ); #else f77_char uplo = 'L'; f77_int mm = bli_obj_length( &a ); f77_int incx = bli_obj_vector_inc( &x ); f77_int lda = bli_obj_col_stride( &a ); double* alphap = bli_obj_buffer( &alpha ); double* xp = bli_obj_buffer( &x ); double* ap = bli_obj_buffer( &a ); /* dcomplex* xp = bli_obj_buffer( x ); dcomplex* ap = bli_obj_buffer( &a ); */ dsyr_( &uplo, //zher_( &uplo, &mm, alphap, xp, &incx, ap, &lda ); #endif #ifdef PRINT bli_printm( "a after", &a, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_her_blis" ); #else printf( "data_her_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &a ); bli_obj_free( &a_save ); } //bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt; int r, n_repeats; side_t side; uplo_t uploa; f77_char f77_side; f77_char f77_uploa; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 4; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uploa = BLIS_LOWER; //uploa = BLIS_UPPER; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, make it densely Hermitian, and zero the unstored // triangle to ensure the implementation reads only from the stored // region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); /* bli_obj_toggle_uplo( a ); bli_obj_inc_diag_off( 1, a ); bli_setm( &BLIS_ZERO, &a ); bli_obj_inc_diag_off( -1, a ); bli_obj_toggle_uplo( a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, a ); bli_scalm( &BLIS_TWO, &a ); bli_scalm( &BLIS_TWO, &a ); */ bli_setsc( (2.0/1.0), 1.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_hemm( side, &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); ssymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); chemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zhemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%9.5f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_hemm_blis" ); #else printf( "data_hemm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt, dt_real; char dt_ch; int r, n_repeats; trans_t transa; trans_t transb; f77_char f77_transa; f77_char f77_transb; double dtime; double dtime_save; double gflops; extern blksz_t* gemm_kc; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; dt_real = bli_datatype_proj_to_real( DT ); p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; n_input = -1; k_input = -1; // Extract the kc blocksize for the requested datatype and its // real analogue. dim_t kc = bli_blksz_get_def( dt, gemm_kc ); dim_t kc_real = bli_blksz_get_def( dt_real, gemm_kc ); // Assign the k dimension depending on which implementation is // being tested. Note that the BLIS_NAT case handles the real // domain cases as well as native complex. if ( IND == BLIS_NAT ) k_input = kc; else if ( IND == BLIS_3M1 ) k_input = kc_real / 3; else if ( IND == BLIS_4M1A ) k_input = kc_real / 2; else k_input = kc_real; // Adjust the relative dimensions, if requested. #if (defined ADJ_MK) m_input = -2; k_input = -2; n_input = -1; #elif (defined ADJ_KN) k_input = -2; n_input = -2; m_input = -1; #elif (defined ADJ_MN) m_input = -2; n_input = -2; k_input = -1; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; transa = BLIS_NO_TRANSPOSE; transb = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, ( unsigned long )0, 0.0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, k, 2, 2*m, &a ); //bli_obj_create( dt, k, n, 2, 2*k, &b ); //bli_obj_create( dt, m, n, 2, 2*m, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, a ); bli_obj_set_conjtrans( transb, b ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( IND, dt ); #endif dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_gemm( &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zgemm_( &f77_transa, //zgemm3m_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; #if 0 side = BLIS_LEFT; #else side = BLIS_RIGHT; #endif #if 0 uploa = BLIS_LOWER; #else uploa = BLIS_UPPER; #endif transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_max; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, n, n, 1, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); bli_randm( &a ); bli_mktrim( &a ); // Load the diagonal of A to make it more likely to be invertible. bli_shiftd( &BLIS_TWO, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &c, &c_save ); #if 0 //def BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trsm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); strsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); dtrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); #else scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); #endif ctrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); #else dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); #endif ztrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; }
void libblis_test_axpyf_check( obj_t* alpha, obj_t* a, obj_t* x, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_datatype( *y ); num_t dt_real = bli_obj_datatype_proj_to_real( *y ); dim_t m = bli_obj_vector_dim( *y ); dim_t b_n = bli_obj_width( *a ); dim_t i; obj_t a1, chi1, v; obj_t alpha_chi1; obj_t norm; double junk; // // Pre-conditions: // - a is randomized. // - x is randomized. // - y is randomized. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig + alpha * conja(A) * conjx(x) // // is functioning correctly if // // normf( y - v ) // // is negligible, where v contains y as computed by repeated calls to // axpyv. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_scalar_init_detached( dt, &alpha_chi1 ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_copyv( y_orig, &v ); for ( i = 0; i < b_n; ++i ) { bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, x, &chi1 ); bli_copysc( &chi1, &alpha_chi1 ); bli_mulsc( alpha, &alpha_chi1 ); bli_axpyv( &alpha_chi1, &a1, &v ); } bli_subv( y, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); }
void libblis_test_gemm_md_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt_real = bli_obj_dt_proj_to_real( c ); num_t dt_comp = bli_obj_dt_proj_to_complex( c ); num_t dt; dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width_after_trans( a ); obj_t norm; obj_t t, v, w, z; double junk; // Compute our reference checksum in the real domain if all operands // are real, and in the complex domain otherwise. Also implicit in this // is that we use the storage precision of C to determine the precision // in which we perform the reference checksum. if ( bli_obj_is_real( a ) && bli_obj_is_real( b ) && bli_obj_is_real( c ) ) dt = dt_real; else dt = dt_comp; // This function works in a manner similar to that of the function // libblis_test_gemm_check(), except that we project a, b, and c into // the complex domain (regardless of their storage datatype), and then // proceed with the checking accordingly. obj_t a2, b2, c2, c0; bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); libblis_test_vobj_randomize( params, TRUE, &t ); // We need to zero out the imaginary part of t in order for our // checks to work in all cases. Otherwise, the imaginary parts // could affect intermediate products, depending on the order that // they are executed. bli_setiv( &BLIS_ZERO, &t ); // Create complex equivalents of a, b, c_orig, and c. bli_obj_create( dt, m, k, 0, 0, &a2 ); bli_obj_create( dt, k, n, 0, 0, &b2 ); bli_obj_create( dt, m, n, 0, 0, &c2 ); bli_obj_create( dt, m, n, 0, 0, &c0 ); // Cast a, b, c_orig, and c into the datatype of our temporary objects. bli_castm( a, &a2 ); bli_castm( b, &b2 ); bli_castm( c_orig, &c2 ); bli_castm( c, &c0 ); bli_gemv( &BLIS_ONE, &c0, &t, &BLIS_ZERO, &v ); #if 0 if ( bli_obj_is_scomplex( c ) && bli_obj_is_float( a ) && bli_obj_is_float( b ) ) { bli_printm( "test_gemm.c: a", a, "%7.3f", "" ); bli_printm( "test_gemm.c: b", b, "%7.3f", "" ); bli_printm( "test_gemm.c: c orig", c_orig, "%7.3f", "" ); bli_printm( "test_gemm.c: c computed", c, "%7.3f", "" ); } #endif #if 0 bli_gemm( alpha, &a2, &b2, beta, &c2 ); bli_gemv( &BLIS_ONE, &c2, &t, &BLIS_ZERO, &z ); if ( bli_obj_is_real( c ) ) bli_setiv( &BLIS_ZERO, &z ); #else bli_gemv( &BLIS_ONE, &b2, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, &a2, &w, &BLIS_ZERO, &z ); bli_gemv( beta, &c2, &t, &BLIS_ONE, &z ); if ( bli_obj_is_real( c ) ) bli_setiv( &BLIS_ZERO, &z ); #endif bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); bli_obj_free( &a2 ); bli_obj_free( &b2 ); bli_obj_free( &c2 ); bli_obj_free( &c0 ); }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; double dtime; double dtime_save; double gflops; int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 16; p_end = 2048; p_inc = 16; m_input = 10240; n_input = 10240; k_input = -1; #else p_begin = 24; p_end = 24; p_inc = 1; m_input = -1; k_input = -1; n_input = -1; #endif dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; for ( p = p_begin + world_rank * p_inc ; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, k, 0, 0, &a ); bli_obj_create( dt_b, k, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_setsc( (1.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); { bli_gemm( &alpha, &a, &b, &beta, &c ); } #else char transa = 'N'; char transb = 'N'; int mm = bli_obj_length( c ); int kk = bli_obj_width_after_trans( a ); int nn = bli_obj_width( c ); int lda = bli_obj_col_stride( a ); int ldb = bli_obj_col_stride( b ); int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); //if(world_rank == 0){ #ifdef BLIS printf( "data_gemm_blis" ); #else printf( "data_gemm_%s", BLAS ); #endif printf( "( %2ld, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f %d ];\n", (p - p_begin + 1)/p_inc + 1, m, k, n, dtime_save, gflops, world_rank ); //} bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); MPI_Finalize(); return 0; }
void libblis_test_gemm_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_dt( c ); num_t dt_real = bli_obj_dt_proj_to_real( c ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width_after_trans( a ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized. // - b is randomized. // - c_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transb(B) // // is functioning correctly if // // normf( v - z ) // // is negligible, where // // v = C * t // z = ( beta * C_orig + alpha * transa(A) * transb(B) ) * t // = beta * C_orig * t + alpha * transa(A) * transb(B) * t // = beta * C_orig * t + alpha * transa(A) * w // = beta * C_orig * t + z // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, a, &w, &BLIS_ZERO, &z ); bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 8; n_input = 4; #endif dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_obj_set_struc( BLIS_TRIANGULAR, a ); bli_obj_set_uplo( uplo, a ); bli_randm( &a ); bli_randm( &c ); bli_randm( &b ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%11.8f", "" ); bli_printm( "c", &c, "%14.11f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_trmm( side, &alpha, &a, &c ); #else f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* cp = bli_obj_buffer( c ); dtrmm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%14.11f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_trmm_blis" ); #else printf( "data_trmm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
void libblis_test_trmm3_check( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_datatype( *c ); num_t dt_real = bli_obj_datatype_proj_to_real( *c ); dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); obj_t kappa, norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized and triangular. // - b is randomized. // - c_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transb(B) (side = left) // C := beta * C_orig + alpha * transb(B) * transa(A) (side = right) // // is functioning correctly if // // fnorm( v - z ) // // is negligible, where // // v = C * t // // z = ( beta * C_orig + alpha * transa(A) * transb(B) ) * t (side = left) // = beta * C_orig * t + alpha * transa(A) * transb(B) * t // = beta * C_orig * t + alpha * transa(A) * w // = beta * C_orig * t + z // // z = ( beta * C_orig + alpha * transb(B) * transa(A) ) * t (side = right) // = beta * C_orig * t + alpha * transb(B) * transa(A) * t // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z bli_obj_scalar_init_detached( dt, &kappa ); bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, n, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } bli_randv( &t ); bli_setsc( 1.0/( double )n, 0.0, &kappa ); bli_scalv( &kappa, &t ); bli_gemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); if ( bli_is_left( side ) ) { bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &w ); bli_trmv( alpha, a, &w ); bli_copyv( &w, &z ); } else { bli_copyv( &t, &w ); bli_trmv( &BLIS_ONE, a, &w ); bli_gemv( alpha, b, &w, &BLIS_ZERO, &z ); } bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_fnormv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); n_input = strtol( argv[2], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); #if 1 dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; #else dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT; //dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; //uplo = BLIS_UPPER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); //bli_obj_set_diag( BLIS_UNIT_DIAG, &a ); bli_randm( &a ); bli_randm( &c ); bli_randm( &b ); /* { obj_t a2; bli_obj_alias_to( &a, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_inc_diag_offset( 1, &a2 ); bli_setm( &BLIS_ZERO, &a2 ); bli_obj_inc_diag_offset( -2, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a2 ); bli_scalm( &BLIS_TWO, &a2 ); //bli_scalm( &BLIS_TWO, &a ); } */ bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT /* obj_t ar, ai; bli_obj_alias_to( &a, &ar ); bli_obj_alias_to( &a, &ai ); bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2; bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1; bli_printm( "ar", &ar, "%4.1f", "" ); bli_printm( "ai", &ai, "%4.1f", "" ); */ bli_invertd( &a ); bli_printm( "a", &a, "%4.1f", "" ); bli_invertd( &a ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_trsm( side, //bli_trsm4m( side, //bli_trsm3m( side, &alpha, &a, &c ); #else if ( bli_is_real( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float * alphap = bli_obj_buffer( &alpha ); float * ap = bli_obj_buffer( &a ); float * cp = bli_obj_buffer( &c ); strsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else // if ( bli_is_complex( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* cp = bli_obj_buffer( &c ); ctrsm_( &side, //ztrsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt_a ) ) gflops *= 4.0; #ifdef BLIS printf( "data_trsm_blis" ); #else printf( "data_trsm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
void libblis_test_her_check( obj_t* alpha, obj_t* x, obj_t* a, obj_t* a_orig, double* resid ) { num_t dt = bli_obj_datatype( *a ); num_t dt_real = bli_obj_datatype_proj_to_real( *a ); dim_t m_a = bli_obj_length( *a ); obj_t xh, t, v, w; obj_t tau, rho, norm; double junk; // // Pre-conditions: // - x is randomized. // - a is randomized and Hermitian. // Note: // - alpha must be real-valued. // // Under these conditions, we assume that the implementation for // // A := A_orig + alpha * conjx(x) * conjx(x)^H // // is functioning correctly if // // normf( v - w ) // // is negligible, where // // v = A * t // w = ( A_orig + alpha * conjx(x) * conjx(x)^H ) * t // = A_orig * t + alpha * conjx(x) * conjx(x)^H * t // = A_orig * t + alpha * conjx(x) * rho // = A_orig * t + w // bli_mkherm( a ); bli_mkherm( a_orig ); bli_obj_set_struc( BLIS_GENERAL, *a ); bli_obj_set_struc( BLIS_GENERAL, *a_orig ); bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); bli_obj_scalar_init_detached( dt, &tau ); bli_obj_scalar_init_detached( dt, &rho ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); bli_obj_create( dt, m_a, 1, 0, 0, &w ); bli_obj_alias_with_conj( BLIS_CONJUGATE, *x, xh ); bli_setsc( 1.0/( double )m_a, -1.0/( double )m_a, &tau ); bli_setv( &tau, &t ); bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v ); bli_dotv( &xh, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, x, &w ); bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w ); bli_subv( &w, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); }
void libblis_test_dotxaxpyf_check ( test_params_t* params, obj_t* alpha, obj_t* at, obj_t* a, obj_t* w, obj_t* x, obj_t* beta, obj_t* y, obj_t* z, obj_t* y_orig, obj_t* z_orig, double* resid ) { num_t dt = bli_obj_datatype( *y ); num_t dt_real = bli_obj_datatype_proj_to_real( *y ); dim_t m = bli_obj_vector_dim( *z ); dim_t b_n = bli_obj_vector_dim( *y ); dim_t i; obj_t a1, chi1, psi1, v, q; obj_t alpha_chi1; obj_t norm; double resid1, resid2; double junk; // // Pre-conditions: // - a is randomized. // - w is randomized. // - x is randomized. // - y is randomized. // - z is randomized. // - at is an alias to a. // Note: // - alpha and beta should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * conjat(A^T) * conjw(w) // z := z_orig + alpha * conja(A) * conjx(x) // // is functioning correctly if // // normf( y - v ) // // and // // normf( z - q ) // // are negligible, where v and q contain y and z as computed by repeated // calls to dotxv and axpyv, respectively. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_scalar_init_detached( dt, &alpha_chi1 ); bli_obj_create( dt, b_n, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &q ); bli_copyv( y_orig, &v ); bli_copyv( z_orig, &q ); // v := beta * v + alpha * conjat(at) * conjw(w) for ( i = 0; i < b_n; ++i ) { bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, at, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, &v, &psi1 ); bli_dotxv( alpha, &a1, w, beta, &psi1 ); } // q := q + alpha * conja(a) * conjx(x) for ( i = 0; i < b_n; ++i ) { bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, x, &chi1 ); bli_copysc( &chi1, &alpha_chi1 ); bli_mulsc( alpha, &alpha_chi1 ); bli_axpyv( &alpha_chi1, &a1, &q ); } bli_subv( y, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, &resid1, &junk ); bli_subv( z, &q ); bli_normfv( &q, &norm ); bli_getsc( &norm, &resid2, &junk ); *resid = bli_fmaxabs( resid1, resid2 ); bli_obj_free( &v ); bli_obj_free( &q ); }
int main( int argc, char** argv ) { obj_t a, x, y; obj_t a_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_x, dt_y; num_t dt_alpha; int r, n_repeats; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 15; n_input = 15; #endif dt_alpha = dt_x = dt_y = dt_a = BLIS_DOUBLE; for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_y, n, 1, 0, 0, &y ); bli_obj_create( dt_a, m, n, 0, 0, &a ); bli_obj_create( dt_a, m, n, 0, 0, &a_save ); bli_randm( &x ); bli_randm( &y ); bli_randm( &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &a, &a_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &a_save, &a ); dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "y", &y, "%4.1f", "" ); bli_printm( "a", &a, "%4.1f", "" ); #endif #ifdef BLIS bli_ger( &alpha, &x, &y, &a ); #else f77_int mm = bli_obj_length( a ); f77_int nn = bli_obj_width( a ); f77_int incx = bli_obj_vector_inc( x ); f77_int incy = bli_obj_vector_inc( y ); f77_int lda = bli_obj_col_stride( a ); double* alphap = bli_obj_buffer( alpha ); double* xp = bli_obj_buffer( x ); double* yp = bli_obj_buffer( y ); double* ap = bli_obj_buffer( a ); dger_( &mm, &nn, alphap, xp, &incx, yp, &incy, ap, &lda ); #endif #ifdef PRINT bli_printm( "a after", &a, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_ger_blis" ); #else printf( "data_ger_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &a_save ); } bli_finalize(); return 0; }
void libblis_test_trsm_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* b_orig, double* resid ) { num_t dt = bli_obj_datatype( *b ); num_t dt_real = bli_obj_datatype_proj_to_real( *b ); dim_t m = bli_obj_length( *b ); dim_t n = bli_obj_width( *b ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized and triangular. // - b_orig is randomized. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // B := alpha * inv(transa(A)) * B_orig (side = left) // B := alpha * B_orig * inv(transa(A)) (side = right) // // is functioning correctly if // // normf( v - z ) // // is negligible, where // // v = B * t // // z = ( alpha * inv(transa(A)) * B ) * t (side = left) // = alpha * inv(transa(A)) * B * t // = alpha * inv(transa(A)) * w // // z = ( alpha * B * inv(transa(A)) ) * t (side = right) // = alpha * B * tinv(ransa(A)) * t // = alpha * B * w bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, n, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &v ); if ( bli_is_left( side ) ) { bli_gemv( alpha, b_orig, &t, &BLIS_ZERO, &w ); bli_trsv( &BLIS_ONE, a, &w ); bli_copyv( &w, &z ); } else { bli_copyv( &t, &w ); bli_trsv( &BLIS_ONE, a, &w ); bli_gemv( alpha, b_orig, &w, &BLIS_ZERO, &z ); } bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); }
void libblis_test_her2k_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_datatype( *c ); num_t dt_real = bli_obj_datatype_proj_to_real( *c ); dim_t m = bli_obj_length( *c ); dim_t k = bli_obj_width_after_trans( *a ); obj_t alphac, ah, bh; obj_t norm; obj_t t, v, w1, w2, z; double junk; // // Pre-conditions: // - a is randomized. // - b is randomized. // - c_orig is randomized and Hermitian. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // - beta must be real-valued. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transb(B)^H + conj(alpha) * transb(B) * transa(A)^H // // is functioning correctly if // // normf( v - z ) // // is negligible, where // // v = C * t // z = ( beta * C_orig + alpha * transa(A) * transb(B)^H + conj(alpha) * transb(B) * transa(A)^H ) * t // = beta * C_orig * t + alpha * transa(A) * transb(B)^H * t + conj(alpha) * transb(B) * transa(A)^H * t // = beta * C_orig * t + alpha * transa(A) * transb(B)^H * t + conj(alpha) * transb(B) * w2 // = beta * C_orig * t + alpha * transa(A) * w1 + conj(alpha) * transb(B) * w2 // = beta * C_orig * t + alpha * transa(A) * w1 + z // = beta * C_orig * t + z // bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *a, ah ); bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *b, bh ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_scalar_init_detached_copy_of( dt, BLIS_CONJUGATE, alpha, &alphac ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w1 ); bli_obj_create( dt, k, 1, 0, 0, &w2 ); bli_obj_create( dt, m, 1, 0, 0, &z ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_hemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); bli_gemv( &BLIS_ONE, &ah, &t, &BLIS_ZERO, &w2 ); bli_gemv( &BLIS_ONE, &bh, &t, &BLIS_ZERO, &w1 ); bli_gemv( alpha, a, &w1, &BLIS_ZERO, &z ); bli_gemv( &alphac, b, &w2, &BLIS_ONE, &z ); bli_hemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w1 ); bli_obj_free( &w2 ); bli_obj_free( &z ); }
void libblis_test_trmv_check( obj_t* alpha, obj_t* a, obj_t* x, obj_t* x_orig, double* resid ) { num_t dt = bli_obj_datatype( *x ); num_t dt_real = bli_obj_datatype_proj_to_real( *x ); dim_t m = bli_obj_vector_dim( *x ); uplo_t uploa = bli_obj_uplo( *a ); trans_t transa = bli_obj_conjtrans_status( *a ); obj_t a_local, y; obj_t norm; double junk; // // Pre-conditions: // - a is randomized and triangular. // - x is randomized. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // x := alpha * transa(A) * x_orig // // is functioning correctly if // // fnorm( y - x ) // // is negligible, where // // y = alpha * conja(A_dense) * x_orig // bli_obj_init_scalar( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &y ); bli_obj_create( dt, m, m, 0, 0, &a_local ); bli_obj_set_struc( BLIS_TRIANGULAR, a_local ); bli_obj_set_uplo( uploa, a_local ); bli_obj_toggle_uplo_if_trans( transa, a_local ); bli_copym( a, &a_local ); bli_mktrim( &a_local ); bli_obj_set_struc( BLIS_GENERAL, a_local ); bli_obj_set_uplo( BLIS_DENSE, a_local ); bli_gemv( alpha, &a_local, x_orig, &BLIS_ZERO, &y ); bli_subv( x, &y ); bli_fnormv( &y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &y ); bli_obj_free( &a_local ); }
int main( int argc, char** argv ) { bli_init(); #if 0 obj_t a, b, c; obj_t aa, bb, cc; dim_t m, n, k; num_t dt; uplo_t uploa, uplob, uploc; { dt = BLIS_DOUBLE; m = 6; k = 6; n = 6; bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); uploa = BLIS_UPPER; uploa = BLIS_LOWER; bli_obj_set_struc( BLIS_TRIANGULAR, a ); bli_obj_set_uplo( uploa, a ); bli_obj_set_diag_offset( -2, a ); uplob = BLIS_UPPER; uplob = BLIS_LOWER; bli_obj_set_struc( BLIS_TRIANGULAR, b ); bli_obj_set_uplo( uplob, b ); bli_obj_set_diag_offset( -2, b ); uploc = BLIS_UPPER; //uploc = BLIS_LOWER; //uploc = BLIS_ZEROS; //uploc = BLIS_DENSE; bli_obj_set_struc( BLIS_HERMITIAN, c ); //bli_obj_set_struc( BLIS_TRIANGULAR, c ); bli_obj_set_uplo( uploc, c ); bli_obj_set_diag_offset( 1, c ); bli_obj_alias_to( a, aa ); (void)aa; bli_obj_alias_to( b, bb ); (void)bb; bli_obj_alias_to( c, cc ); (void)cc; bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); //bli_mkherm( &a ); //bli_mktrim( &a ); bli_prune_unref_mparts( &cc, BLIS_M, &aa, BLIS_N ); bli_printm( "c orig", &c, "%4.1f", "" ); bli_printm( "c alias", &cc, "%4.1f", "" ); bli_printm( "a orig", &a, "%4.1f", "" ); bli_printm( "a alias", &aa, "%4.1f", "" ); //bli_obj_print( "a struct", &a ); } #endif dim_t p_begin, p_max, p_inc; gint_t m_input, n_input; char uploa_ch; doff_t diagoffa; dim_t bf; dim_t n_way; char part_dim_ch; bool_t go_fwd; char out_ch; obj_t a; thrinfo_t thrinfo; dim_t m, n; uplo_t uploa; bool_t part_m_dim, part_n_dim; bool_t go_bwd; dim_t p; num_t dt; dim_t start, end; dim_t width; siz_t area; gint_t t_begin, t_stop, t_inc; dim_t t; if ( argc == 13 ) { sscanf( argv[1], "%lu", &p_begin ); sscanf( argv[2], "%lu", &p_max ); sscanf( argv[3], "%lu", &p_inc ); sscanf( argv[4], "%ld", &m_input ); sscanf( argv[5], "%ld", &n_input ); sscanf( argv[6], "%c", &uploa_ch ); sscanf( argv[7], "%ld", &diagoffa ); sscanf( argv[8], "%lu", &bf ); sscanf( argv[9], "%lu", &n_way ); sscanf( argv[10], "%c", &part_dim_ch ); sscanf( argv[11], "%lu", &go_fwd ); sscanf( argv[12], "%c", &out_ch ); } else { printf( "\n" ); printf( " %s\n", argv[0] ); printf( "\n" ); printf( " Simulate the dimension ranges assigned to threads when\n" ); printf( " partitioning a matrix for parallelism in BLIS.\n" ); printf( "\n" ); printf( " Usage:\n" ); printf( "\n" ); printf( " %s p_beg p_max p_inc m n uplo doff bf n_way part_dim go_fwd out\n", argv[0] ); printf( "\n" ); printf( " p_beg: the first problem size p to test.\n" ); printf( " p_max: the maximum problem size p to test.\n" ); printf( " p_inc: the increase in problem size p between tests.\n" ); printf( " m: the m dimension:\n" ); printf( " n: the n dimension:\n" ); printf( " if m,n = -1: bind m,n to problem size p.\n" ); printf( " if m,n = 0: bind m,n to p_max.\n" ); printf( " if m,n > 0: hold m,n = c constant for all p.\n" ); printf( " uplo: the uplo field of the matrix being partitioned:\n" ); printf( " 'l': lower-stored (BLIS_LOWER)\n" ); printf( " 'u': upper-stored (BLIS_UPPER)\n" ); printf( " 'd': densely-stored (BLIS_DENSE)\n" ); printf( " doff: the diagonal offset of the matrix being partitioned.\n" ); printf( " bf: the simulated blocking factor. all thread ranges must\n" ); printf( " be a multiple of bf, except for the range that contains\n" ); printf( " the edge case (if one exists). the blocking factor\n" ); printf( " would typically correspond to a register blocksize.\n" ); printf( " n_way: the number of ways of parallelism for which we are\n" ); printf( " partitioning (i.e.: the number of threads, or thread\n" ); printf( " groups).\n" ); printf( " part_dim: the dimension to partition:\n" ); printf( " 'm': partition the m dimension.\n" ); printf( " 'n': partition the n dimension.\n" ); printf( " go_fwd: the direction to partition:\n" ); printf( " '1': forward, e.g. left-to-right (part_dim = 'm') or\n" ); printf( " top-to-bottom (part_dim = 'n')\n" ); printf( " '0': backward, e.g. right-to-left (part_dim = 'm') or\n" ); printf( " bottom-to-top (part_dim = 'n')\n" ); printf( " NOTE: reversing the direction does not change the\n" ); printf( " subpartitions' widths, but it does change which end of\n" ); printf( " the index range receives the edge case, if it exists.\n" ); printf( " out: the type of output per thread-column:\n" ); printf( " 'w': the width (and area) of the thread's subpartition\n" ); printf( " 'r': the actual ranges of the thread's subpartition\n" ); printf( " where the start and end points of each range are\n" ); printf( " inclusive and exclusive, respectively.\n" ); printf( "\n" ); exit(1); } if ( m_input == 0 ) m_input = p_max; if ( n_input == 0 ) n_input = p_max; if ( part_dim_ch == 'm' ) { part_m_dim = TRUE; part_n_dim = FALSE; } else { part_m_dim = FALSE; part_n_dim = TRUE; } go_bwd = !go_fwd; if ( uploa_ch == 'l' ) uploa = BLIS_LOWER; else if ( uploa_ch == 'u' ) uploa = BLIS_UPPER; else uploa = BLIS_DENSE; if ( part_n_dim ) { if ( bli_is_upper( uploa ) ) { t_begin = n_way-1; t_stop = -1; t_inc = -1; } else /* if lower or dense */ { t_begin = 0; t_stop = n_way; t_inc = 1; } } else // if ( part_m_dim ) { if ( bli_is_lower( uploa ) ) { t_begin = n_way-1; t_stop = -1; t_inc = -1; } else /* if upper or dense */ { t_begin = 0; t_stop = n_way; t_inc = 1; } } printf( "\n" ); printf( " part: %3s doff: %3ld bf: %3ld output: %s\n", ( part_n_dim ? ( go_fwd ? "l2r" : "r2l" ) : ( go_fwd ? "t2b" : "b2t" ) ), diagoffa, bf, ( out_ch == 'w' ? "width(area)" : "ranges" ) ); printf( " uplo: %3c nt: %3ld\n", uploa_ch, n_way ); printf( "\n" ); printf( " " ); for ( t = t_begin; t != t_stop; t += t_inc ) { if ( part_n_dim ) { if ( t == t_begin ) printf( "left... " ); else if ( t == t_stop-t_inc ) printf( " ...right" ); else printf( " " ); } else // if ( part_m_dim ) { if ( t == t_begin ) printf( "top... " ); else if ( t == t_stop-t_inc ) printf( " ...bottom" ); else printf( " " ); } } printf( "\n" ); printf( "%4c x %4c ", 'm', 'n' ); for ( t = t_begin; t != t_stop; t += t_inc ) { printf( "%9s %lu ", "thread", t ); } printf( "\n" ); printf( "-------------" ); for ( t = t_begin; t != t_stop; t += t_inc ) { printf( "-------------" ); } printf( "\n" ); for ( p = p_begin; p <= p_max; p += p_inc ) { if ( m_input < 0 ) m = ( dim_t )p; else m = ( dim_t )m_input; if ( n_input < 0 ) n = ( dim_t )p; else n = ( dim_t )n_input; dt = BLIS_DOUBLE; bli_obj_create( dt, m, n, 0, 0, &a ); bli_obj_set_struc( BLIS_TRIANGULAR, a ); bli_obj_set_uplo( uploa, a ); bli_obj_set_diag_offset( diagoffa, a ); bli_randm( &a ); printf( "%4lu x %4lu ", m, n ); for ( t = t_begin; t != t_stop; t += t_inc ) { thrinfo.n_way = n_way; thrinfo.work_id = t; if ( part_n_dim && go_fwd ) area = bli_get_range_weighted_l2r( &thrinfo, &a, bf, &start, &end ); else if ( part_n_dim && go_bwd ) area = bli_get_range_weighted_r2l( &thrinfo, &a, bf, &start, &end ); else if ( part_m_dim && go_fwd ) area = bli_get_range_weighted_t2b( &thrinfo, &a, bf, &start, &end ); else // ( part_m_dim && go_bwd ) area = bli_get_range_weighted_b2t( &thrinfo, &a, bf, &start, &end ); width = end - start; if ( out_ch == 'w' ) printf( "%4lu(%6lu) ", width, area ); else printf( "[%4lu,%4lu) ", start, end ); } printf( "\n" ); bli_obj_free( &a ); } bli_finalize(); return 0; }