ind_t bli_ind_oper_find_avail( opid_t oper, num_t dt ) { ind_t im; // If the datatype is real, return native execution. if ( !bli_is_complex( dt ) ) return BLIS_NAT; // If the operation is not level-3, return native execution. if ( !bli_opid_is_level3( oper ) ) return BLIS_NAT; // Iterate over all induced methods and search for the first one // that is available (ie: both implemented and enabled) for the // current operation and datatype. for ( im = 0; im < BLIS_NUM_IND_METHODS; ++im ) { void* func = bli_ind_oper_get_func( oper, im ); bool_t stat = bli_ind_oper_get_enable( oper, im, dt ); if ( func != NULL && stat == TRUE ) return im; } // This return statement should never execute since the native index // should be found even if all induced methods are unavailable. We // include it simply to avoid a compiler warning. return BLIS_NAT; }
void bli_ind_oper_enable_only( opid_t oper, ind_t method, num_t dt ) { if ( !bli_is_complex( dt ) ) return; if ( !bli_opid_is_level3( oper ) ) return; bli_ind_oper_set_enable_all( oper, dt, FALSE ); bli_ind_oper_set_enable( oper, method, dt, TRUE ); }
num_t bli_ind_map_cdt_to_index( num_t dt ) { // A non-complex datatype should never be passed in. if ( !bli_is_complex( dt ) ) bli_abort(); // Map the complex datatype to a zero-based index. if ( bli_is_scomplex( dt ) ) return 0; else /* if ( bli_is_dcomplex( dt ) ) */ return 1; }
void bli_ind_set_enable_dt( ind_t method, num_t dt, bool_t status ) { opid_t iop; if ( !bli_is_complex( dt ) ) return; // Iterate over all level-3 operation ids. for ( iop = 0; iop < BLIS_NUM_LEVEL3_OPS; ++iop ) { bli_ind_oper_set_enable( iop, method, dt, status ); } }
err_t bli_check_datatype_real_proj_of( num_t dt_c, num_t dt_r ) { err_t e_val = BLIS_SUCCESS; if ( ( dt_c == BLIS_CONSTANT && bli_is_complex( dt_r ) ) || ( dt_c == BLIS_FLOAT && dt_r != BLIS_FLOAT ) || ( dt_c == BLIS_DOUBLE && dt_r != BLIS_DOUBLE ) || ( dt_c == BLIS_SCOMPLEX && dt_r != BLIS_FLOAT ) || ( dt_c == BLIS_DCOMPLEX && dt_r != BLIS_DOUBLE ) ) e_val = BLIS_EXPECTED_REAL_PROJ_OF; return e_val; }
bool_t bli_ind_oper_is_avail( opid_t oper, ind_t method, num_t dt ) { void* func; bool_t stat; // If the datatype is real, it is never available. if ( !bli_is_complex( dt ) ) return FALSE; func = bli_ind_oper_get_func( oper, method ); stat = bli_ind_oper_get_enable( oper, method, dt ); return ( func != NULL && stat == TRUE ); }
void bli_ind_oper_set_enable( opid_t oper, ind_t method, num_t dt, bool_t status ) { num_t idt; if ( !bli_is_complex( dt ) ) return; if ( !bli_opid_is_level3( oper ) ) return; // Disallow changing status of native execution. if ( method == BLIS_NAT ) return; idt = bli_ind_map_cdt_to_index( dt ); bli_ind_oper_st[ method ][ oper ][ idt ] = status; }
void bli_ind_oper_set_enable_all( opid_t oper, num_t dt, bool_t status ) { ind_t im; if ( !bli_is_complex( dt ) ) return; if ( !bli_opid_is_level3( oper ) ) return; for ( im = 0; im < BLIS_NUM_IND_METHODS; ++im ) { // Native execution should always stay enabled. if ( im != BLIS_NAT ) bli_ind_oper_set_enable( oper, im, dt, status ); } }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt; int r, n_repeats; uplo_t uploc; trans_t transa; f77_char f77_uploc; f77_char f77_transa; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 3; k_input = 1; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif uploc = BLIS_LOWER; //uploc = BLIS_UPPER; transa = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_uplo( uploc, &f77_uploc ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_does_trans( transa ) ) bli_obj_create( dt, k, m, 0, 0, &a ); else bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, m, m, 0, 0, &c ); bli_obj_create( dt, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, c ); bli_obj_set_uplo( uploc, c ); bli_obj_set_conjtrans( transa, a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_herk( &alpha, &a, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); ssyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); float* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); cherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_herk_blis" ); #else printf( "data_herk_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); n_input = strtol( argv[2], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); #if 1 dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; #else dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT; //dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; //uplo = BLIS_UPPER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); //bli_obj_set_diag( BLIS_UNIT_DIAG, &a ); bli_randm( &a ); bli_randm( &c ); bli_randm( &b ); /* { obj_t a2; bli_obj_alias_to( &a, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_inc_diag_offset( 1, &a2 ); bli_setm( &BLIS_ZERO, &a2 ); bli_obj_inc_diag_offset( -2, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a2 ); bli_scalm( &BLIS_TWO, &a2 ); //bli_scalm( &BLIS_TWO, &a ); } */ bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT /* obj_t ar, ai; bli_obj_alias_to( &a, &ar ); bli_obj_alias_to( &a, &ai ); bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2; bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1; bli_printm( "ar", &ar, "%4.1f", "" ); bli_printm( "ai", &ai, "%4.1f", "" ); */ bli_invertd( &a ); bli_printm( "a", &a, "%4.1f", "" ); bli_invertd( &a ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_trsm( side, //bli_trsm4m( side, //bli_trsm3m( side, &alpha, &a, &c ); #else if ( bli_is_real( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float * alphap = bli_obj_buffer( &alpha ); float * ap = bli_obj_buffer( &a ); float * cp = bli_obj_buffer( &c ); strsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else // if ( bli_is_complex( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* cp = bli_obj_buffer( &c ); ctrsm_( &side, //ztrsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt_a ) ) gflops *= 4.0; #ifdef BLIS printf( "data_trsm_blis" ); #else printf( "data_trsm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; #if 0 side = BLIS_LEFT; #else side = BLIS_RIGHT; #endif #if 0 uploa = BLIS_LOWER; #else uploa = BLIS_UPPER; #endif transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_max; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, n, n, 1, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); bli_randm( &a ); bli_mktrim( &a ); // Load the diagonal of A to make it more likely to be invertible. bli_shiftd( &BLIS_TWO, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &c, &c_save ); #if 0 //def BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trsm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); strsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); dtrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); #else scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); #endif ctrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); #else dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); #endif ztrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt; int r, n_repeats; side_t side; uplo_t uploa; f77_char f77_side; f77_char f77_uploa; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 4; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uploa = BLIS_LOWER; //uploa = BLIS_UPPER; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, make it densely Hermitian, and zero the unstored // triangle to ensure the implementation reads only from the stored // region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); /* bli_obj_toggle_uplo( a ); bli_obj_inc_diag_off( 1, a ); bli_setm( &BLIS_ZERO, &a ); bli_obj_inc_diag_off( -1, a ); bli_obj_toggle_uplo( a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, a ); bli_scalm( &BLIS_TWO, &a ); bli_scalm( &BLIS_TWO, &a ); */ bli_setsc( (2.0/1.0), 1.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_hemm( side, &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); ssymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); chemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zhemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%9.5f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_hemm_blis" ); #else printf( "data_hemm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt, dt_real; char dt_ch; int r, n_repeats; trans_t transa; trans_t transb; f77_char f77_transa; f77_char f77_transb; double dtime; double dtime_save; double gflops; extern blksz_t* gemm_kc; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; dt_real = bli_datatype_proj_to_real( DT ); p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; n_input = -1; k_input = -1; // Extract the kc blocksize for the requested datatype and its // real analogue. dim_t kc = bli_blksz_get_def( dt, gemm_kc ); dim_t kc_real = bli_blksz_get_def( dt_real, gemm_kc ); // Assign the k dimension depending on which implementation is // being tested. Note that the BLIS_NAT case handles the real // domain cases as well as native complex. if ( IND == BLIS_NAT ) k_input = kc; else if ( IND == BLIS_3M1 ) k_input = kc_real / 3; else if ( IND == BLIS_4M1A ) k_input = kc_real / 2; else k_input = kc_real; // Adjust the relative dimensions, if requested. #if (defined ADJ_MK) m_input = -2; k_input = -2; n_input = -1; #elif (defined ADJ_KN) k_input = -2; n_input = -2; m_input = -1; #elif (defined ADJ_MN) m_input = -2; n_input = -2; k_input = -1; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; transa = BLIS_NO_TRANSPOSE; transb = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, ( unsigned long )0, 0.0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, k, 2, 2*m, &a ); //bli_obj_create( dt, k, n, 2, 2*k, &b ); //bli_obj_create( dt, m, n, 2, 2*m, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, a ); bli_obj_set_conjtrans( transb, b ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( IND, dt ); #endif dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_gemm( &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zgemm_( &f77_transa, //zgemm3m_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
void bli_ind_disable_dt( ind_t method, num_t dt ) { if ( !bli_is_complex( dt ) ) return; bli_ind_set_enable_dt( method, dt, FALSE ); }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt; int r, n_repeats; trans_t transa; trans_t transb; f77_char f77_transa; f77_char f77_transb; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 5; k_input = 6; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif transa = BLIS_NO_TRANSPOSE; transb = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, a ); bli_obj_set_conjtrans( transb, b ); bli_setsc( (0.9/1.0), 0.2, &alpha ); bli_setsc( -(1.1/1.0), 0.3, &beta ); bli_copym( &c, &c_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_gemm( &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_gemm_blis" ); #else printf( "data_gemm_%s", BLAS ); #endif printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }