void bli_obj_free( obj_t* obj ) { if ( bli_error_checking_is_enabled() ) bli_obj_free_check( obj ); // Don't dereference obj if it is NULL. if ( obj != NULL ) { // Idiot safety: Don't try to free the buffer field if the object // is a detached scalar (ie: if the buffer pointer refers to the // address of the internal scalar buffer). if ( bli_obj_buffer( *obj ) != bli_obj_internal_scalar_buffer( *obj ) ) bli_free_user( bli_obj_buffer( *obj ) ); } }
err_t bli_check_object_buffer( obj_t* a ) { err_t e_val = BLIS_SUCCESS; // We are only concerned with NULL buffers in objects where BOTH // dimensions are non-zero. if ( bli_obj_buffer( *a ) == NULL ) if ( bli_obj_length( *a ) > 0 && bli_obj_width( *a ) > 0 ) e_val = BLIS_EXPECTED_NONNULL_OBJECT_BUFFER; return e_val; }
void bli_obj_print( char* label, obj_t* obj ) { FILE* file = stdout; if ( bli_error_checking_is_enabled() ) bli_obj_print_check( label, obj ); fprintf( file, "\n" ); fprintf( file, "%s\n", label ); fprintf( file, "\n" ); fprintf( file, " m x n %lu x %lu\n", ( unsigned long int )bli_obj_length( *obj ), ( unsigned long int )bli_obj_width( *obj ) ); fprintf( file, "\n" ); fprintf( file, " offm, offn %lu, %lu\n", ( unsigned long int )bli_obj_row_off( *obj ), ( unsigned long int )bli_obj_col_off( *obj ) ); fprintf( file, " diagoff %ld\n", ( signed long int )bli_obj_diag_offset( *obj ) ); fprintf( file, "\n" ); fprintf( file, " buf %p\n", ( void* )bli_obj_buffer( *obj ) ); fprintf( file, " elem size %lu\n", ( unsigned long int )bli_obj_elem_size( *obj ) ); fprintf( file, " rs, cs %ld, %ld\n", ( signed long int )bli_obj_row_stride( *obj ), ( signed long int )bli_obj_col_stride( *obj ) ); fprintf( file, " is %ld\n", ( signed long int )bli_obj_imag_stride( *obj ) ); fprintf( file, " m_padded %lu\n", ( unsigned long int )bli_obj_padded_length( *obj ) ); fprintf( file, " n_padded %lu\n", ( unsigned long int )bli_obj_padded_width( *obj ) ); fprintf( file, " ps %lu\n", ( unsigned long int )bli_obj_panel_stride( *obj ) ); fprintf( file, "\n" ); fprintf( file, " info %lX\n", ( unsigned long int )(*obj).info ); fprintf( file, " - is complex %lu\n", ( unsigned long int )bli_obj_is_complex( *obj ) ); fprintf( file, " - is d. prec %lu\n", ( unsigned long int )bli_obj_is_double_precision( *obj ) ); fprintf( file, " - datatype %lu\n", ( unsigned long int )bli_obj_datatype( *obj ) ); fprintf( file, " - target dt %lu\n", ( unsigned long int )bli_obj_target_datatype( *obj ) ); fprintf( file, " - exec dt %lu\n", ( unsigned long int )bli_obj_execution_datatype( *obj ) ); fprintf( file, " - has trans %lu\n", ( unsigned long int )bli_obj_has_trans( *obj ) ); fprintf( file, " - has conj %lu\n", ( unsigned long int )bli_obj_has_conj( *obj ) ); fprintf( file, " - unit diag? %lu\n", ( unsigned long int )bli_obj_has_unit_diag( *obj ) ); fprintf( file, " - struc type %lu\n", ( unsigned long int )bli_obj_struc( *obj ) >> BLIS_STRUC_SHIFT ); fprintf( file, " - uplo type %lu\n", ( unsigned long int )bli_obj_uplo( *obj ) >> BLIS_UPLO_SHIFT ); fprintf( file, " - is upper %lu\n", ( unsigned long int )bli_obj_is_upper( *obj ) ); fprintf( file, " - is lower %lu\n", ( unsigned long int )bli_obj_is_lower( *obj ) ); fprintf( file, " - is dense %lu\n", ( unsigned long int )bli_obj_is_dense( *obj ) ); fprintf( file, " - pack schema %lu\n", ( unsigned long int )bli_obj_pack_schema( *obj ) >> BLIS_PACK_SCHEMA_SHIFT ); fprintf( file, " - packinv diag? %lu\n", ( unsigned long int )bli_obj_has_inverted_diag( *obj ) ); fprintf( file, " - pack ordifup %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_upper( *obj ) ); fprintf( file, " - pack ordiflo %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_lower( *obj ) ); fprintf( file, " - packbuf type %lu\n", ( unsigned long int )bli_obj_pack_buffer_type( *obj ) >> BLIS_PACK_BUFFER_SHIFT ); fprintf( file, "\n" ); }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); n_input = strtol( argv[2], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); #if 1 dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; #else dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT; //dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; //uplo = BLIS_UPPER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); //bli_obj_set_diag( BLIS_UNIT_DIAG, &a ); bli_randm( &a ); bli_randm( &c ); bli_randm( &b ); /* { obj_t a2; bli_obj_alias_to( &a, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_inc_diag_offset( 1, &a2 ); bli_setm( &BLIS_ZERO, &a2 ); bli_obj_inc_diag_offset( -2, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a2 ); bli_scalm( &BLIS_TWO, &a2 ); //bli_scalm( &BLIS_TWO, &a ); } */ bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT /* obj_t ar, ai; bli_obj_alias_to( &a, &ar ); bli_obj_alias_to( &a, &ai ); bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2; bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1; bli_printm( "ar", &ar, "%4.1f", "" ); bli_printm( "ai", &ai, "%4.1f", "" ); */ bli_invertd( &a ); bli_printm( "a", &a, "%4.1f", "" ); bli_invertd( &a ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_trsm( side, //bli_trsm4m( side, //bli_trsm3m( side, &alpha, &a, &c ); #else if ( bli_is_real( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float * alphap = bli_obj_buffer( &alpha ); float * ap = bli_obj_buffer( &a ); float * cp = bli_obj_buffer( &c ); strsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else // if ( bli_is_complex( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* cp = bli_obj_buffer( &c ); ctrsm_( &side, //ztrsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt_a ) ) gflops *= 4.0; #ifdef BLIS printf( "data_trsm_blis" ); #else printf( "data_trsm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 8; n_input = 4; #endif dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_obj_set_struc( BLIS_TRIANGULAR, a ); bli_obj_set_uplo( uplo, a ); bli_randm( &a ); bli_randm( &c ); bli_randm( &b ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%11.8f", "" ); bli_printm( "c", &c, "%14.11f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_trmm( side, &alpha, &a, &c ); #else f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* cp = bli_obj_buffer( c ); dtrmm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%14.11f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_trmm_blis" ); #else printf( "data_trmm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; double dtime; double dtime_save; double gflops; int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 16; p_end = 2048; p_inc = 16; m_input = 10240; n_input = 10240; k_input = -1; #else p_begin = 24; p_end = 24; p_inc = 1; m_input = -1; k_input = -1; n_input = -1; #endif dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; for ( p = p_begin + world_rank * p_inc ; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, k, 0, 0, &a ); bli_obj_create( dt_b, k, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_setsc( (1.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); { bli_gemm( &alpha, &a, &b, &beta, &c ); } #else char transa = 'N'; char transb = 'N'; int mm = bli_obj_length( c ); int kk = bli_obj_width_after_trans( a ); int nn = bli_obj_width( c ); int lda = bli_obj_col_stride( a ); int ldb = bli_obj_col_stride( b ); int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); //if(world_rank == 0){ #ifdef BLIS printf( "data_gemm_blis" ); #else printf( "data_gemm_%s", BLAS ); #endif printf( "( %2ld, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f %d ];\n", (p - p_begin + 1)/p_inc + 1, m, k, n, dtime_save, gflops, world_rank ); //} bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); MPI_Finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; #if 0 side = BLIS_LEFT; #else side = BLIS_RIGHT; #endif #if 0 uploa = BLIS_LOWER; #else uploa = BLIS_UPPER; #endif transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_max; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, n, n, 1, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); bli_randm( &a ); bli_mktrim( &a ); // Load the diagonal of A to make it more likely to be invertible. bli_shiftd( &BLIS_TWO, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &c, &c_save ); #if 0 //def BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trsm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); strsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); dtrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); #else scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); #endif ctrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); #else dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); #endif ztrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, x, y; obj_t a_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_x, dt_y; num_t dt_alpha; int r, n_repeats; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 15; n_input = 15; #endif dt_alpha = dt_x = dt_y = dt_a = BLIS_DOUBLE; for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_y, n, 1, 0, 0, &y ); bli_obj_create( dt_a, m, n, 0, 0, &a ); bli_obj_create( dt_a, m, n, 0, 0, &a_save ); bli_randm( &x ); bli_randm( &y ); bli_randm( &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &a, &a_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &a_save, &a ); dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "y", &y, "%4.1f", "" ); bli_printm( "a", &a, "%4.1f", "" ); #endif #ifdef BLIS bli_ger( &alpha, &x, &y, &a ); #else f77_int mm = bli_obj_length( a ); f77_int nn = bli_obj_width( a ); f77_int incx = bli_obj_vector_inc( x ); f77_int incy = bli_obj_vector_inc( y ); f77_int lda = bli_obj_col_stride( a ); double* alphap = bli_obj_buffer( alpha ); double* xp = bli_obj_buffer( x ); double* yp = bli_obj_buffer( y ); double* ap = bli_obj_buffer( a ); dger_( &mm, &nn, alphap, xp, &incx, yp, &incy, ap, &lda ); #endif #ifdef PRINT bli_printm( "a after", &a, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_ger_blis" ); #else printf( "data_ger_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &a_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, x; obj_t a_save; obj_t alpha; dim_t m; dim_t p; dim_t p_begin, p_end, p_inc; int m_input; num_t dt_a, dt_x; num_t dt_alpha; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 6; #endif #if 1 dt_alpha = dt_x = dt_a = BLIS_DOUBLE; #else dt_alpha = dt_x = dt_a = BLIS_DCOMPLEX; #endif uplo = BLIS_LOWER; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_her_blis" ); #else printf( "data_her_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_a, m, m, 0, 0, &a_save ); bli_randm( &x ); bli_randm( &a ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); //bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uplo, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &a, &a_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &a_save, &a ); dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "a", &a, "%4.1f", "" ); #endif #ifdef BLIS //bli_obj_toggle_conj( &x ); //bli_syr( &alpha, bli_her( &alpha, &x, &a ); #else f77_char uplo = 'L'; f77_int mm = bli_obj_length( &a ); f77_int incx = bli_obj_vector_inc( &x ); f77_int lda = bli_obj_col_stride( &a ); double* alphap = bli_obj_buffer( &alpha ); double* xp = bli_obj_buffer( &x ); double* ap = bli_obj_buffer( &a ); /* dcomplex* xp = bli_obj_buffer( x ); dcomplex* ap = bli_obj_buffer( &a ); */ dsyr_( &uplo, //zher_( &uplo, &mm, alphap, xp, &incx, ap, &lda ); #endif #ifdef PRINT bli_printm( "a after", &a, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_her_blis" ); #else printf( "data_her_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &a ); bli_obj_free( &a_save ); } //bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt; int r, n_repeats; trans_t transa; trans_t transb; f77_char f77_transa; f77_char f77_transb; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 5; k_input = 6; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif transa = BLIS_NO_TRANSPOSE; transb = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, a ); bli_obj_set_conjtrans( transb, b ); bli_setsc( (0.9/1.0), 0.2, &alpha ); bli_setsc( -(1.1/1.0), 0.3, &beta ); bli_copym( &c, &c_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_gemm( &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_gemm_blis" ); #else printf( "data_gemm_%s", BLAS ); #endif printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt; int r, n_repeats; side_t side; uplo_t uploa; f77_char f77_side; f77_char f77_uploa; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 4; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uploa = BLIS_LOWER; //uploa = BLIS_UPPER; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, make it densely Hermitian, and zero the unstored // triangle to ensure the implementation reads only from the stored // region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); /* bli_obj_toggle_uplo( a ); bli_obj_inc_diag_off( 1, a ); bli_setm( &BLIS_ZERO, &a ); bli_obj_inc_diag_off( -1, a ); bli_obj_toggle_uplo( a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, a ); bli_scalm( &BLIS_TWO, &a ); bli_scalm( &BLIS_TWO, &a ); */ bli_setsc( (2.0/1.0), 1.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_hemm( side, &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); ssymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); chemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zhemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%9.5f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_hemm_blis" ); #else printf( "data_hemm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt, dt_real; char dt_ch; int r, n_repeats; trans_t transa; trans_t transb; f77_char f77_transa; f77_char f77_transb; double dtime; double dtime_save; double gflops; extern blksz_t* gemm_kc; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; dt_real = bli_datatype_proj_to_real( DT ); p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; n_input = -1; k_input = -1; // Extract the kc blocksize for the requested datatype and its // real analogue. dim_t kc = bli_blksz_get_def( dt, gemm_kc ); dim_t kc_real = bli_blksz_get_def( dt_real, gemm_kc ); // Assign the k dimension depending on which implementation is // being tested. Note that the BLIS_NAT case handles the real // domain cases as well as native complex. if ( IND == BLIS_NAT ) k_input = kc; else if ( IND == BLIS_3M1 ) k_input = kc_real / 3; else if ( IND == BLIS_4M1A ) k_input = kc_real / 2; else k_input = kc_real; // Adjust the relative dimensions, if requested. #if (defined ADJ_MK) m_input = -2; k_input = -2; n_input = -1; #elif (defined ADJ_KN) k_input = -2; n_input = -2; m_input = -1; #elif (defined ADJ_MN) m_input = -2; n_input = -2; k_input = -1; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; transa = BLIS_NO_TRANSPOSE; transb = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, ( unsigned long )0, 0.0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, k, 2, 2*m, &a ); //bli_obj_create( dt, k, n, 2, 2*k, &b ); //bli_obj_create( dt, m, n, 2, 2*m, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, a ); bli_obj_set_conjtrans( transb, b ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( IND, dt ); #endif dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_gemm( &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zgemm_( &f77_transa, //zgemm3m_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t x, y; obj_t alpha, beta; dim_t m; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int ii; #ifdef NBLIS bli_init(); #endif m = 4000; dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; { #ifdef NBLIS bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, 1, 0, 0, &x ); bli_obj_create( dt_a, m, 1, 0, 0, &y ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_b, m, m, 0, 0, &b ); bli_obj_create( dt_c, m, m, 0, 0, &c ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); #endif #ifdef NBLAS x.buffer = malloc( m * 1 * sizeof( double ) ); y.buffer = malloc( m * 1 * sizeof( double ) ); alpha.buffer = malloc( 1 * sizeof( double ) ); beta.buffer = malloc( 1 * sizeof( double ) ); a.buffer = malloc( m * m * sizeof( double ) ); a.m = m; a.n = m; a.cs = m; b.buffer = malloc( m * m * sizeof( double ) ); b.m = m; b.n = m; b.cs = m; c.buffer = malloc( m * m * sizeof( double ) ); c.m = m; c.n = m; c.cs = m; *((double*)alpha.buffer) = 2.0; *((double*)beta.buffer) = -1.0; #endif #ifdef NBLIS #if NBLIS >= 1 for ( ii = 0; ii < 2000000000; ++ii ) { bli_gemm( &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 2 { bli_hemm( BLIS_LEFT, &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 3 { bli_herk( &BLIS_ONE, &a, &BLIS_ONE, &c ); } #endif #if NBLIS >= 4 { bli_her2k( &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 5 { bli_trmm( BLIS_LEFT, &BLIS_ONE, &a, &c ); } #endif #if NBLIS >= 6 { bli_trsm( BLIS_LEFT, &BLIS_ONE, &a, &c ); } #endif #endif #ifdef NBLAS #if NBLAS >= 1 for ( ii = 0; ii < 2000000000; ++ii ) { f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 2 { f77_char side = 'L'; f77_char uplo = 'L'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsymm_( &side, &uplo, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 3 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyrk_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #if NBLAS >= 4 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyr2k_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 5 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* cp = bli_obj_buffer( c ); dtrmm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 6 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* cp = bli_obj_buffer( c ); dtrsm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 7 { f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 8 { f77_char side = 'L'; f77_char uplo = 'L'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zhemm_( &side, &uplo, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 9 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zherk_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #if NBLAS >= 10 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zher2k_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 11 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* cp = bli_obj_buffer( c ); ztrmm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 12 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* cp = bli_obj_buffer( c ); ztrsm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #endif #ifdef NBLIS bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); #endif #ifdef NBLAS free( x.buffer ); free( y.buffer ); free( alpha.buffer ); free( beta.buffer ); free( a.buffer ); free( b.buffer ); free( c.buffer ); #endif } #ifdef NBLIS bli_finalize(); #endif return 0; }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt_a, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); k_input = strtol( argv[3], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); dt_a = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; uplo = BLIS_LOWER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, k, 0, 0, &a ); bli_obj_create( dt_c, m, m, 0, 0, &c ); bli_obj_create( dt_c, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &c ); bli_obj_set_uplo( uplo, &c ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_herk( &alpha, &a, &beta, &c ); #else f77_char uploa = 'L'; f77_char transa = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsyrk_( &uploa, &transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_herk_blis" ); #else printf( "data_herk_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; n_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 8; k_input = 16; n_input = 16; #endif dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, k, 0, 0, &a ); bli_obj_create( dt_b, k, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_gemm( &alpha, &a, &b, &beta, &c ); #else f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_gemm_blis" ); #else printf( "data_gemm_%s", BLAS ); #endif printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt; int r, n_repeats; uplo_t uploc; trans_t transa; f77_char f77_uploc; f77_char f77_transa; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 3; k_input = 1; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif uploc = BLIS_LOWER; //uploc = BLIS_UPPER; transa = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_uplo( uploc, &f77_uploc ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_does_trans( transa ) ) bli_obj_create( dt, k, m, 0, 0, &a ); else bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, m, m, 0, 0, &c ); bli_obj_create( dt, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, c ); bli_obj_set_uplo( uploc, c ); bli_obj_set_conjtrans( transa, a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_herk( &alpha, &a, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); ssyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); float* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); cherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_herk_blis" ); #else printf( "data_herk_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
void bli_packm_acquire_mpart_t2b( subpart_t requested_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { dim_t m, n; // For now, we only support acquiring the middle subpartition. if ( requested_part != BLIS_SUBPART1 ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } // Partitioning top-to-bottom through packed column panels (which are // row-stored) is not yet supported. if ( bli_obj_is_col_packed( *obj ) ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } // Query the dimensions of the parent object. m = bli_obj_length( *obj ); n = bli_obj_width( *obj ); // Foolproofing: do not let b exceed what's left of the m dimension at // row offset i. if ( b > m - i ) b = m - i; // Begin by copying the info, elem size, buffer, row stride, and column // stride fields of the parent object. Note that this omits copying view // information because the new partition will have its own dimensions // and offsets. bli_obj_init_subpart_from( *obj, *sub_obj ); // Modify offsets and dimensions of requested partition. bli_obj_set_dims( b, n, *sub_obj ); // Tweak the padded length of the subpartition to trick the underlying // implementation into only zero-padding for the narrow submatrix of // interest. Usually, the value we want is b (for non-edge cases), but // at the edges, we want the remainder of the mem_t region in the m // dimension. Edge cases are defined as occurring when i + b is exactly // equal to the inherited sub-object's length (which happens since the // determine_blocksize function would have returned a smaller value of // b for the edge iteration). In these cases, we arrive at the new // packed length by simply subtracting off i. { dim_t m_pack_max = bli_obj_padded_length( *sub_obj ); dim_t m_pack_cur; if ( i + b == m ) m_pack_cur = m_pack_max - i; else m_pack_cur = b; bli_obj_set_padded_length( m_pack_cur, *sub_obj ); } // Translate the desired offsets to a panel offset and adjust the // buffer pointer of the subpartition object. { char* buf_p = bli_obj_buffer( *sub_obj ); siz_t elem_size = bli_obj_elem_size( *sub_obj ); dim_t off_to_panel = bli_packm_offset_to_panel_for( i, sub_obj ); buf_p = buf_p + elem_size * off_to_panel; bli_obj_set_buffer( ( void* )buf_p, *sub_obj ); } }
int main( int argc, char** argv ) { obj_t a, x; obj_t x_save; obj_t alpha; dim_t m; dim_t p; dim_t p_begin, p_end, p_inc; int m_input; num_t dt_a, dt_x; num_t dt_alpha; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 15; n_input = 15; #endif dt_alpha = dt_a = dt_x = BLIS_DOUBLE; uplo = BLIS_LOWER; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_trsv_blis" ); #else printf( "data_trv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_x, m, 1, 0, 0, &x_save ); bli_randm( &a ); bli_randm( &x ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); // Randomize A and zero the unstored triangle to ensure the // implementation reads only from the stored region. bli_randm( &a ); bli_mktrim( &a ); // Load the diagonal of A to make it more likely to be invertible. bli_shiftd( &BLIS_TWO, &a ); bli_setsc( (1.0/1.0), 0.0, &alpha ); bli_copym( &x, &x_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &x_save, &x ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "x", &x, "%4.1f", "" ); #endif #ifdef BLIS bli_trsv( &BLIS_ONE, &a, &x ); #else f77_char uploa = 'L'; f77_char transa = 'N'; f77_char diaga = 'N'; f77_int mm = bli_obj_length( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int incx = bli_obj_vector_inc( &x ); double* ap = bli_obj_buffer( &a ); double* xp = bli_obj_buffer( &x ); dtrsv_( &uploa, &transa, &diaga, &mm, ap, &lda, xp, &incx ); #endif #ifdef PRINT bli_printm( "x after", &x, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_trsv_blis" ); #else printf( "data_trsv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &x_save ); } //bli_finalize(); return 0; }