예제 #1
0
void libblis_test_syr2k_experiment( test_params_t* params,
                                    test_op_t*     op,
                                    iface_t        iface,
                                    num_t          datatype,
                                    char*          pc_str,
                                    char*          sc_str,
                                    unsigned int   p_cur,
                                    double*        perf,
                                    double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m, k;

	uplo_t       uploc;
	trans_t      transa, transb;

	obj_t        kappa;
	obj_t        alpha, a, b, beta, c;
	obj_t        c_save;


	// Map the dimension specifier to actual dimensions.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
	k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_uplo( pc_str[0], &uploc );
	bli_param_map_char_to_blis_trans( pc_str[1], &transa );
	bli_param_map_char_to_blis_trans( pc_str[2], &transb );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &kappa );
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, transa,
	                          sc_str[0], m, k, &a );
	libblis_test_mobj_create( params, datatype, transb,
	                          sc_str[1], m, k, &b );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[2], m, m, &c );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[2], m, m, &c_save );

	// Set alpha and beta.
	if ( bli_obj_is_real( c ) )
	{
		bli_setsc(  0.8, 0.0, &alpha );
		bli_setsc( -1.0, 0.0, &beta );
	}
	else
	{
		// For syr2k, both alpha and beta may be complex since, unlike her2k,
		// C is symmetric in both the real and complex cases.
		bli_setsc(  0.8, 0.5, &alpha );
		bli_setsc( -1.0, 0.5, &beta );
	}

	// Randomize A and B.
	bli_randm( &a );
	bli_randm( &b );

	// Set the structure and uplo properties of C.
	bli_obj_set_struc( BLIS_SYMMETRIC, c );
	bli_obj_set_uplo( uploc, c );

	// Randomize A, make it densely symmetric, and zero the unstored triangle
	// to ensure the implementation is reads only from the stored region.
	bli_randm( &c );
	bli_mksymm( &c );
	bli_mktrim( &c );

	// Save C and set its structure and uplo properties.
	bli_obj_set_struc( BLIS_SYMMETRIC, c_save );
	bli_obj_set_uplo( uploc, c_save );
	bli_copym( &c, &c_save );

	// Normalize by k.
	bli_setsc( 1.0/( double )k, 0.0, &kappa );
	bli_scalm( &kappa, &a );
	bli_scalm( &kappa, &b );

	// Apply the remaining parameters.
	bli_obj_set_conjtrans( transa, a );
	bli_obj_set_conjtrans( transb, b );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &c_save, &c );

		time = bli_clock();

		libblis_test_syr2k_impl( iface, &alpha, &a, &b, &beta, &c );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( c ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_syr2k_check( &alpha, &a, &b, &beta, &c, &c_save, resid );

	// Zero out performance and residual if output matrix is empty.
	libblis_test_check_empty_problem( &c, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &b );
	bli_obj_free( &c );
	bli_obj_free( &c_save );
}
예제 #2
0
파일: test_hemm.c 프로젝트: ShawnLess/blis
int main( int argc, char** argv )
{
	obj_t a, b, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, n;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, n_input;
	num_t dt;
	int   r, n_repeats;
	side_t side;
	uplo_t uploa;
	f77_char f77_side;
	f77_char f77_uploa;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

	n_repeats = 3;

#ifndef PRINT
	p_begin = 200;
	p_end   = 2000;
	p_inc   = 200;

	m_input = -1;
	n_input = -1;
#else
	p_begin = 16;
	p_end   = 16;
	p_inc   = 1;

	m_input = 4;
	n_input = 4;
#endif

#if 1
	//dt = BLIS_FLOAT;
	dt = BLIS_DOUBLE;
#else
	//dt = BLIS_SCOMPLEX;
	dt = BLIS_DCOMPLEX;
#endif

	side = BLIS_LEFT;
	//side = BLIS_RIGHT;

	uploa = BLIS_LOWER;
	//uploa = BLIS_UPPER;

	bli_param_map_blis_to_netlib_side( side, &f77_side );
	bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );


	for ( p = p_begin; p <= p_end; p += p_inc )
	{
		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;

		bli_obj_create( dt, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt, 1, 1, 0, 0, &beta );

		if ( bli_is_left( side ) )
			bli_obj_create( dt, m, m, 0, 0, &a );
		else
			bli_obj_create( dt, n, n, 0, 0, &a );
		bli_obj_create( dt, m, n, 0, 0, &b );
		bli_obj_create( dt, m, n, 0, 0, &c );
		bli_obj_create( dt, m, n, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &b );
		bli_randm( &c );

		bli_obj_set_struc( BLIS_HERMITIAN, a );
		bli_obj_set_uplo( uploa, a );

		// Randomize A, make it densely Hermitian, and zero the unstored
		// triangle to ensure the implementation reads only from the stored
		// region.
		bli_randm( &a );
		bli_mkherm( &a );
		bli_mktrim( &a );
/*
		bli_obj_toggle_uplo( a );
		bli_obj_inc_diag_off( 1, a );
		bli_setm( &BLIS_ZERO, &a );
		bli_obj_inc_diag_off( -1, a );
		bli_obj_toggle_uplo( a );
		bli_obj_set_diag( BLIS_NONUNIT_DIAG, a );
		bli_scalm( &BLIS_TWO, &a );
		bli_scalm( &BLIS_TWO, &a );
*/

		bli_setsc(  (2.0/1.0), 1.0, &alpha );
		bli_setsc( -(1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();

#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "b", &b, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_hemm( side,
			          &alpha,
			          &a,
			          &b,
			          &beta,
			          &c );
#else

		if ( bli_is_float( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			float*   alphap = bli_obj_buffer( alpha );
			float*   ap     = bli_obj_buffer( a );
			float*   bp     = bli_obj_buffer( b );
			float*   betap  = bli_obj_buffer( beta );
			float*   cp     = bli_obj_buffer( c );

			ssymm_( &f77_side,
			        &f77_uploa,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_double( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*  alphap = bli_obj_buffer( alpha );
			double*  ap     = bli_obj_buffer( a );
			double*  bp     = bli_obj_buffer( b );
			double*  betap  = bli_obj_buffer( beta );
			double*  cp     = bli_obj_buffer( c );

			dsymm_( &f77_side,
			        &f77_uploa,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_scomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			scomplex*  alphap = bli_obj_buffer( alpha );
			scomplex*  ap     = bli_obj_buffer( a );
			scomplex*  bp     = bli_obj_buffer( b );
			scomplex*  betap  = bli_obj_buffer( beta );
			scomplex*  cp     = bli_obj_buffer( c );

			chemm_( &f77_side,
			        &f77_uploa,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_dcomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			dcomplex*  alphap = bli_obj_buffer( alpha );
			dcomplex*  ap     = bli_obj_buffer( a );
			dcomplex*  bp     = bli_obj_buffer( b );
			dcomplex*  betap  = bli_obj_buffer( beta );
			dcomplex*  cp     = bli_obj_buffer( c );

			zhemm_( &f77_side,
			        &f77_uploa,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%9.5f", "" );
			exit(1);
#endif

			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		if ( bli_is_left( side ) )
			gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 );
		else
			gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_hemm_blis" );
#else
		printf( "data_hemm_%s", BLAS );
#endif
		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )n, dtime_save, gflops );

		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &b );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
예제 #3
0
파일: test_her.c 프로젝트: figual/blis
int main( int argc, char** argv )
{
	obj_t a, x;
	obj_t a_save;
	obj_t alpha;
	dim_t m;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input;
	num_t dt_a, dt_x;
	num_t dt_alpha;
	int   r, n_repeats;
	uplo_t uplo;

	double dtime;
	double dtime_save;
	double gflops;

	//bli_init();

	n_repeats = 3;

#ifndef PRINT
	p_begin = 40;
	p_end   = 2000;
	p_inc   = 40;

	m_input = -1;
#else
	p_begin = 16;
	p_end   = 16;
	p_inc   = 1;

	m_input = 6;
#endif

#if 1
	dt_alpha = dt_x = dt_a = BLIS_DOUBLE;
#else
	dt_alpha = dt_x = dt_a = BLIS_DCOMPLEX;
#endif

	uplo = BLIS_LOWER;

	// Begin with initializing the last entry to zero so that
	// matlab allocates space for the entire array once up-front.
	for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
	printf( "data_her_blis" );
#else
	printf( "data_her_%s", BLAS );
#endif
	printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n",
	        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
	        ( unsigned long )0, 0.0 );

	for ( p = p_begin; p <= p_end; p += p_inc )
	{

		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;


		bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );

		bli_obj_create( dt_x, m, 1, 0, 0, &x );
		bli_obj_create( dt_a, m, m, 0, 0, &a );
		bli_obj_create( dt_a, m, m, 0, 0, &a_save );

		bli_randm( &x );
		bli_randm( &a );

		bli_obj_set_struc( BLIS_HERMITIAN, &a );
		//bli_obj_set_struc( BLIS_SYMMETRIC, &a );
		bli_obj_set_uplo( uplo, &a );


		bli_setsc(  (2.0/1.0), 0.0, &alpha );


		bli_copym( &a, &a_save );
	
		dtime_save = DBL_MAX;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &a_save, &a );


			dtime = bli_clock();

#ifdef PRINT
			bli_printm( "x", &x, "%4.1f", "" );
			bli_printm( "a", &a, "%4.1f", "" );
#endif

#ifdef BLIS
			//bli_obj_toggle_conj( &x );

			//bli_syr( &alpha,
			bli_her( &alpha,
			         &x,
			         &a );

#else

			f77_char uplo   = 'L';
			f77_int  mm     = bli_obj_length( &a );
			f77_int  incx   = bli_obj_vector_inc( &x );
			f77_int  lda    = bli_obj_col_stride( &a );
			double*  alphap = bli_obj_buffer( &alpha );
			double*  xp     = bli_obj_buffer( &x );
			double*  ap     = bli_obj_buffer( &a );
/*
			dcomplex* xp   = bli_obj_buffer( x );
			dcomplex* ap   = bli_obj_buffer( &a );
*/

			dsyr_( &uplo,
			//zher_( &uplo,
			       &mm,
			       alphap,
			       xp, &incx,
			       ap, &lda );
#endif

#ifdef PRINT
			bli_printm( "a after", &a, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		gflops = ( 1.0 * m * m ) / ( dtime_save * 1.0e9 );

#ifdef BLIS
		printf( "data_her_blis" );
#else
		printf( "data_her_%s", BLAS );
#endif
		printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m, gflops );

		bli_obj_free( &alpha );

		bli_obj_free( &x );
		bli_obj_free( &a );
		bli_obj_free( &a_save );
	}

	//bli_finalize();

	return 0;
}
예제 #4
0
파일: test_trmm.c 프로젝트: tkelman/blis
int main( int argc, char** argv )
{
	obj_t a, b, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, n;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, n_input;
	num_t dt_a, dt_b, dt_c;
	num_t dt_alpha, dt_beta;
	int   r, n_repeats;
	side_t side;
	uplo_t uplo;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	n_repeats = 3;

    if( argc < 7 ) 
    {   
        printf("Usage:\n");
        printf("test_foo.x m n p_begin p_inc p_end:\n");
        exit;
    }   

    int world_size, world_rank, provided;
    MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
    MPI_Comm_size( MPI_COMM_WORLD, &world_size );
    MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );

    m_input = strtol( argv[1], NULL, 10 );
    n_input = strtol( argv[2], NULL, 10 );
    p_begin = strtol( argv[4], NULL, 10 );
    p_inc   = strtol( argv[5], NULL, 10 );
    p_end   = strtol( argv[6], NULL, 10 );

#if 1
	dt_a = BLIS_DOUBLE;
	dt_b = BLIS_DOUBLE;
	dt_c = BLIS_DOUBLE;
	dt_alpha = BLIS_DOUBLE;
	dt_beta = BLIS_DOUBLE;
#else
	dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX;
#endif

	side = BLIS_LEFT;
	//side = BLIS_RIGHT;

	uplo = BLIS_LOWER;
	//uplo = BLIS_UPPER;

    for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
	{

		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;


		bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );

		if ( bli_is_left( side ) )
			bli_obj_create( dt_a, m, m, 0, 0, &a );
		else
			bli_obj_create( dt_a, n, n, 0, 0, &a );
		bli_obj_create( dt_b, m, n, 0, 0, &b );
		bli_obj_create( dt_c, m, n, 0, 0, &c );
		bli_obj_create( dt_c, m, n, 0, 0, &c_save );

		bli_obj_set_struc( BLIS_TRIANGULAR, a );
		bli_obj_set_uplo( uplo, a );

		bli_randm( &a );
		bli_randm( &c );
		bli_randm( &b );

/*
		bli_obj_toggle_uplo( a );
		bli_obj_inc_diag_off( -1, a );
		bli_setm( &BLIS_ZERO, &a );
		bli_obj_inc_diag_off( 1, a );
		bli_obj_toggle_uplo( a );
		bli_obj_set_diag( BLIS_NONUNIT_DIAG, a );
		bli_scalm( &BLIS_TWO, &a );
		//bli_scalm( &BLIS_TWO, &a );
*/



		bli_setsc(  (2.0/1.0), 0.0, &alpha );
		bli_setsc(  (1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );

			dtime = bli_clock();


#ifdef PRINT

/*
			obj_t ar, ai;
			bli_obj_alias_to( a, ar );
			bli_obj_alias_to( a, ai );
			bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
			bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
			bli_printm( "ar", &ar, "%4.1f", "" );
			bli_printm( "ai", &ai, "%4.1f", "" );
*/
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS
			bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

			bli_trmm( side,
			//bli_trmm4m( side,
			          &alpha,
			          &a,
			          &c );

#else

			f77_char side   = 'L';
			f77_char uplo   = 'L';
			f77_char transa = 'N';
			f77_char diag   = 'N';
			f77_int  mm     = bli_obj_length( c );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*  alphap = bli_obj_buffer( alpha );
			double*  ap     = bli_obj_buffer( a );
			double*  cp     = bli_obj_buffer( c );

			dtrmm_( &side,
			        &uplo,
			        &transa,
			        &diag,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif



			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		if ( bli_is_left( side ) )
			gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
		else
			gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt_a ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_trmm_blis" );
#else
		printf( "data_trmm_%s", BLAS );
#endif
		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )n, dtime_save, gflops );


		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &b );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
예제 #5
0
파일: test_gemm.c 프로젝트: mindis/blis
int main( int argc, char** argv )
{
	obj_t    a, b, c;
	obj_t    c_save;
	obj_t    alpha, beta;
	dim_t    m, n, k;
	dim_t    p;
	dim_t    p_begin, p_end, p_inc;
	int      m_input, n_input, k_input;
	num_t    dt, dt_real;
	char     dt_ch;
	int      r, n_repeats;
	trans_t  transa;
	trans_t  transb;
	f77_char f77_transa;
	f77_char f77_transb;

	double   dtime;
	double   dtime_save;
	double   gflops;

	extern blksz_t* gemm_kc;

	bli_init();

	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

	n_repeats = 3;

	dt      = DT;
	dt_real = bli_datatype_proj_to_real( DT );

	p_begin = P_BEGIN;
	p_end   = P_END;
	p_inc   = P_INC;

	m_input = -1;
	n_input = -1;
	k_input = -1;

	// Extract the kc blocksize for the requested datatype and its
	// real analogue.
	dim_t kc      = bli_blksz_get_def( dt,      gemm_kc );
	dim_t kc_real = bli_blksz_get_def( dt_real, gemm_kc );

	// Assign the k dimension depending on which implementation is
	// being tested. Note that the BLIS_NAT case handles the real
	// domain cases as well as native complex.
	if      ( IND == BLIS_NAT  ) k_input = kc;
	else if ( IND == BLIS_3M1  ) k_input = kc_real / 3;
	else if ( IND == BLIS_4M1A ) k_input = kc_real / 2;
	else                         k_input = kc_real;

	// Adjust the relative dimensions, if requested.
#if   (defined ADJ_MK)
	m_input = -2; k_input = -2; n_input = -1;
#elif (defined ADJ_KN)
	k_input = -2; n_input = -2; m_input = -1;
#elif (defined ADJ_MN)
	m_input = -2; n_input = -2; k_input = -1;
#endif

	// Choose the char corresponding to the requested datatype.
	if      ( bli_is_float( dt ) )    dt_ch = 's';
	else if ( bli_is_double( dt ) )   dt_ch = 'd';
	else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
	else                              dt_ch = 'z';

	transa = BLIS_NO_TRANSPOSE;
	transb = BLIS_NO_TRANSPOSE;

	bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
	bli_param_map_blis_to_netlib_trans( transb, &f77_transb );

	// Begin with initializing the last entry to zero so that
	// matlab allocates space for the entire array once up-front.
	for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
	printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR );
#else
	printf( "data_%s_%cgemm_%s",      THR_STR, dt_ch, STR );
#endif
	printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu  %10.3e  %6.3f ];\n",
	        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
	        ( unsigned long )0,
	        ( unsigned long )0,
	        ( unsigned long )0, 0.0, 0.0 );


	for ( p = p_begin; p <= p_end; p += p_inc )
	{
		if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;
		if ( k_input < 0 ) k = p / ( dim_t )abs(k_input);
		else               k =     ( dim_t )    k_input;

		bli_obj_create( dt, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt, 1, 1, 0, 0, &beta );

		bli_obj_create( dt, m, k, 0, 0, &a );
		bli_obj_create( dt, k, n, 0, 0, &b );
		bli_obj_create( dt, m, n, 0, 0, &c );
		//bli_obj_create( dt, m, k, 2, 2*m, &a );
		//bli_obj_create( dt, k, n, 2, 2*k, &b );
		//bli_obj_create( dt, m, n, 2, 2*m, &c );
		bli_obj_create( dt, m, n, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &b );
		bli_randm( &c );

		bli_obj_set_conjtrans( transa, a );
		bli_obj_set_conjtrans( transb, b );

		bli_setsc(  (2.0/1.0), 0.0, &alpha );
		bli_setsc( -(1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
#ifdef BLIS
		bli_ind_disable_all_dt( dt );
		bli_ind_enable_dt( IND, dt );
#endif

		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();


#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "b", &b, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_gemm( &alpha,
			          &a,
			          &b,
			          &beta,
			          &c );

#else

		if ( bli_is_float( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			float*   alphap = bli_obj_buffer( alpha );
			float*   ap     = bli_obj_buffer( a );
			float*   bp     = bli_obj_buffer( b );
			float*   betap  = bli_obj_buffer( beta );
			float*   cp     = bli_obj_buffer( c );

			sgemm_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_double( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*  alphap = bli_obj_buffer( alpha );
			double*  ap     = bli_obj_buffer( a );
			double*  bp     = bli_obj_buffer( b );
			double*  betap  = bli_obj_buffer( beta );
			double*  cp     = bli_obj_buffer( c );

			dgemm_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_scomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			scomplex*  alphap = bli_obj_buffer( alpha );
			scomplex*  ap     = bli_obj_buffer( a );
			scomplex*  bp     = bli_obj_buffer( b );
			scomplex*  betap  = bli_obj_buffer( beta );
			scomplex*  cp     = bli_obj_buffer( c );

			cgemm_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_dcomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			dcomplex*  alphap = bli_obj_buffer( alpha );
			dcomplex*  ap     = bli_obj_buffer( a );
			dcomplex*  bp     = bli_obj_buffer( b );
			dcomplex*  betap  = bli_obj_buffer( beta );
			dcomplex*  cp     = bli_obj_buffer( c );

			zgemm_( &f77_transa,
			//zgemm3m_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR );
#else
		printf( "data_%s_%cgemm_%s",      THR_STR, dt_ch, STR );
#endif
		printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )k,
		        ( unsigned long )n, dtime_save, gflops );

		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &b );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
예제 #6
0
void libblis_test_scalm_experiment( test_params_t* params,
                                    test_op_t*     op,
                                    iface_t        iface,
                                    num_t          datatype,
                                    char*          pc_str,
                                    char*          sc_str,
                                    unsigned int   p_cur,
                                    double*        perf,
                                    double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m, n;

	conj_t       conjbeta;

	obj_t        beta, y;
	obj_t        y_save;


	// Map the dimension specifier to actual dimensions.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
	n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[0], m, n, &y );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[0], m, n, &y_save );

	// Set beta to 0 + i.
	//bli_setsc( 0.0, 1.0, &beta );
	if ( bli_obj_is_real( y ) )
		bli_setsc( -2.0,  0.0, &beta );
	else
		bli_setsc(  0.0, -2.0, &beta );

	// Randomize and save y.
	bli_randm( &y );
	bli_copym( &y, &y_save );

	// Apply the parameters.
	bli_obj_set_conj( conjbeta, beta );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &y_save, &y );

		time = bli_clock();

		libblis_test_scalm_impl( iface, &beta, &y );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( y ) ) *perf *= 6.0;

	// Perform checks.
	libblis_test_scalm_check( &beta, &y, &y_save, resid );

	// Zero out performance and residual if output matrix is empty.
	libblis_test_check_empty_problem( &y, perf, resid );

	// Free the test objects.
	bli_obj_free( &y );
	bli_obj_free( &y_save );
}
예제 #7
0
int main( int argc, char** argv )
{
	obj_t a, b, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, n, k;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, n_input, k_input;
	num_t dt;
	int   r, n_repeats;
	trans_t  transa;
	trans_t  transb;
	f77_char f77_transa;
	f77_char f77_transb;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

	n_repeats = 3;

#ifndef PRINT
	p_begin = 200;
	p_end   = 2000;
	p_inc   = 200;

	m_input = -1;
	n_input = -1;
	k_input = -1;
#else
	p_begin = 16;
	p_end   = 16;
	p_inc   = 1;

	m_input = 5;
	k_input = 6;
	n_input = 4;
#endif

#if 1
	//dt = BLIS_FLOAT;
	dt = BLIS_DOUBLE;
#else
	//dt = BLIS_SCOMPLEX;
	dt = BLIS_DCOMPLEX;
#endif

	transa = BLIS_NO_TRANSPOSE;
	transb = BLIS_NO_TRANSPOSE;

	bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
	bli_param_map_blis_to_netlib_trans( transb, &f77_transb );


	for ( p = p_begin; p <= p_end; p += p_inc )
	{
		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;
		if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
		else               k =     ( dim_t )    k_input;

		bli_obj_create( dt, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt, 1, 1, 0, 0, &beta );

		bli_obj_create( dt, m, k, 0, 0, &a );
		bli_obj_create( dt, k, n, 0, 0, &b );
		bli_obj_create( dt, m, n, 0, 0, &c );
		bli_obj_create( dt, m, n, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &b );
		bli_randm( &c );

		bli_obj_set_conjtrans( transa, a );
		bli_obj_set_conjtrans( transb, b );

		bli_setsc(  (0.9/1.0), 0.2, &alpha );
		bli_setsc( -(1.1/1.0), 0.3, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = DBL_MAX;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();


#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "b", &b, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_gemm( &alpha,
			          &a,
			          &b,
			          &beta,
			          &c );

#else

		if ( bli_is_float( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			float*   alphap = bli_obj_buffer( alpha );
			float*   ap     = bli_obj_buffer( a );
			float*   bp     = bli_obj_buffer( b );
			float*   betap  = bli_obj_buffer( beta );
			float*   cp     = bli_obj_buffer( c );

			sgemm_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_double( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*  alphap = bli_obj_buffer( alpha );
			double*  ap     = bli_obj_buffer( a );
			double*  bp     = bli_obj_buffer( b );
			double*  betap  = bli_obj_buffer( beta );
			double*  cp     = bli_obj_buffer( c );

			dgemm_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_scomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			scomplex*  alphap = bli_obj_buffer( alpha );
			scomplex*  ap     = bli_obj_buffer( a );
			scomplex*  bp     = bli_obj_buffer( b );
			scomplex*  betap  = bli_obj_buffer( beta );
			scomplex*  cp     = bli_obj_buffer( c );

			cgemm_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_dcomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			dcomplex*  alphap = bli_obj_buffer( alpha );
			dcomplex*  ap     = bli_obj_buffer( a );
			dcomplex*  bp     = bli_obj_buffer( b );
			dcomplex*  betap  = bli_obj_buffer( beta );
			dcomplex*  cp     = bli_obj_buffer( c );

			zgemm_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_gemm_blis" );
#else
		printf( "data_gemm_%s", BLAS );
#endif
		printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )k,
		        ( unsigned long )n, dtime_save, gflops );

		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &b );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
예제 #8
0
파일: test_trmm.c 프로젝트: figual/blis
int main( int argc, char** argv )
{
	obj_t    a, c;
	obj_t    c_save;
	obj_t    alpha;
	dim_t    m, n;
	dim_t    p;
	dim_t    p_begin, p_end, p_inc;
	int      m_input, n_input;
	ind_t    ind;
	num_t    dt;
	char     dt_ch;
	int      r, n_repeats;
	side_t   side;
	uplo_t   uploa;
	trans_t  transa;
	diag_t   diaga;
	f77_char f77_side;
	f77_char f77_uploa;
	f77_char f77_transa;
	f77_char f77_diaga;

	double   dtime;
	double   dtime_save;
	double   gflops;

	//bli_init();

	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

	n_repeats = 3;

	dt      = DT;

	ind     = IND;

	p_begin = P_BEGIN;
	p_end   = P_END;
	p_inc   = P_INC;

	m_input = -1;
	n_input = -1;


	// Supress compiler warnings about unused variable 'ind'.
	( void )ind;

#if 0

	cntx_t* cntx;

	ind_t ind_mod = ind;

	// A hack to use 3m1 as 1mpb (with 1m as 1mbp).
	if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;

	// Initialize a context for the current induced method and datatype.
	cntx = bli_gks_query_ind_cntx( ind_mod, dt );

	// Set k to the kc blocksize for the current datatype.
	k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx );

#elif 1

	//k_input = 256;

#endif

	// Choose the char corresponding to the requested datatype.
	if      ( bli_is_float( dt ) )    dt_ch = 's';
	else if ( bli_is_double( dt ) )   dt_ch = 'd';
	else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
	else                              dt_ch = 'z';

#if 0
	side   = BLIS_LEFT;
#else
	side   = BLIS_RIGHT;
#endif
#if 0
	uploa  = BLIS_LOWER;
#else
	uploa  = BLIS_UPPER;
#endif
	transa = BLIS_NO_TRANSPOSE;
	diaga  = BLIS_NONUNIT_DIAG;

	bli_param_map_blis_to_netlib_side( side, &f77_side );
	bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );
	bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
	bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga );

	// Begin with initializing the last entry to zero so that
	// matlab allocates space for the entire array once up-front.
	for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
	printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
#else
	printf( "data_%s_%ctrmm_%s",      THR_STR, dt_ch, STR );
#endif
	printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
	        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
	        ( unsigned long )0,
	        ( unsigned long )0, 0.0 );


	for ( p = p_begin; p <= p_end; p += p_inc )
	{

		if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;

		bli_obj_create( dt, 1, 1, 0, 0, &alpha );

		if ( bli_does_trans( side ) )
			bli_obj_create( dt, m, m, 0, 0, &a );
        else
			bli_obj_create( dt, n, n, 0, 0, &a );
		bli_obj_create( dt, m, n, 0, 0, &c );
		bli_obj_create( dt, m, n, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &c );

		bli_obj_set_struc( BLIS_TRIANGULAR, &a );
		bli_obj_set_uplo( uploa, &a );
		bli_obj_set_conjtrans( transa, &a );
		bli_obj_set_diag( diaga, &a );

		bli_randm( &a );
		bli_mktrim( &a );

		bli_setsc(  (2.0/1.0), 0.0, &alpha );

		bli_copym( &c, &c_save );
	
#if 0 //def BLIS
		bli_ind_disable_all_dt( dt );
		bli_ind_enable_dt( ind, dt );
#endif

		dtime_save = DBL_MAX;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();


#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_trmm( side,
			          &alpha,
			          &a,
			          &c );

#else

		if ( bli_is_float( dt ) )
		{
			f77_int   mm     = bli_obj_length( &c );
			f77_int   kk     = bli_obj_width( &c );
			f77_int   lda    = bli_obj_col_stride( &a );
			f77_int   ldc    = bli_obj_col_stride( &c );
			float*    alphap = bli_obj_buffer( &alpha );
			float*    ap     = bli_obj_buffer( &a );
			float*    cp     = bli_obj_buffer( &c );

			strmm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		else if ( bli_is_double( dt ) )
		{
			f77_int   mm     = bli_obj_length( &c );
			f77_int   kk     = bli_obj_width( &c );
			f77_int   lda    = bli_obj_col_stride( &a );
			f77_int   ldc    = bli_obj_col_stride( &c );
			double*   alphap = bli_obj_buffer( &alpha );
			double*   ap     = bli_obj_buffer( &a );
			double*   cp     = bli_obj_buffer( &c );

			dtrmm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		else if ( bli_is_scomplex( dt ) )
		{
			f77_int   mm     = bli_obj_length( &c );
			f77_int   kk     = bli_obj_width( &c );
			f77_int   lda    = bli_obj_col_stride( &a );
			f77_int   ldc    = bli_obj_col_stride( &c );
			scomplex* alphap = bli_obj_buffer( &alpha );
			scomplex* ap     = bli_obj_buffer( &a );
			scomplex* cp     = bli_obj_buffer( &c );

			ctrmm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		else if ( bli_is_dcomplex( dt ) )
		{
			f77_int    mm     = bli_obj_length( &c );
			f77_int    kk     = bli_obj_width( &c );
			f77_int    lda    = bli_obj_col_stride( &a );
			f77_int    ldc    = bli_obj_col_stride( &c );
			dcomplex*  alphap = bli_obj_buffer( &alpha );
			dcomplex*  ap     = bli_obj_buffer( &a );
			dcomplex*  cp     = bli_obj_buffer( &c );

			ztrmm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		if ( bli_is_left( side ) )
			gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
		else
			gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
#else
		printf( "data_%s_%ctrmm_%s",      THR_STR, dt_ch, STR );
#endif
		printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )n, gflops );

		bli_obj_free( &alpha );

		bli_obj_free( &a );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	//bli_finalize();

	return 0;
}
예제 #9
0
파일: test_trsm.c 프로젝트: jmhautbois/blis
int main( int argc, char** argv )
{
	obj_t a, b, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, n;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, n_input;
	num_t dt_a, dt_b, dt_c;
	num_t dt_alpha, dt_beta;
	int   r, n_repeats;
	side_t side;
	uplo_t uplo;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	n_repeats = 3;

#ifndef PRINT
	p_begin = 1000;
	p_end   = 1000;
	p_inc   = 40;

	m_input = -1;
	n_input = -1;
#else
	p_begin = 16;
	p_end   = 16;
	p_inc   = 1;

	m_input = 8 ;
	n_input = 4 ;
#endif

#if 0
	dt_a = BLIS_DOUBLE;
	dt_b = BLIS_DOUBLE;
	dt_c = BLIS_DOUBLE;
	dt_alpha = BLIS_DOUBLE;
	dt_beta = BLIS_DOUBLE;
#else
	dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT; 
	//dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX; 
#endif

	side = BLIS_LEFT;
	//side = BLIS_RIGHT;

	uplo = BLIS_LOWER;
	//uplo = BLIS_UPPER;

	for ( p = p_begin; p <= p_end; p += p_inc )
	{

		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;


		bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );

		if ( bli_is_left( side ) )
			bli_obj_create( dt_a, m, m, 0, 0, &a );
		else
			bli_obj_create( dt_a, n, n, 0, 0, &a );
		bli_obj_create( dt_b, m, n, 0, 0, &b );
		bli_obj_create( dt_c, m, n, 0, 0, &c );
		bli_obj_create( dt_c, m, n, 0, 0, &c_save );

		bli_obj_set_struc( BLIS_TRIANGULAR, a );
		bli_obj_set_uplo( uplo, a );
		//bli_obj_set_diag( BLIS_UNIT_DIAG, a );

		bli_randm( &a );
		bli_randm( &c );
		bli_randm( &b );

/*
		{ 
			obj_t a2;

			bli_obj_alias_to( a, a2 );
			bli_obj_toggle_uplo( a2 );
			bli_obj_inc_diag_off( 1, a2 );
			bli_setm( &BLIS_ZERO, &a2 );
			bli_obj_inc_diag_off( -2, a2 );
			bli_obj_toggle_uplo( a2 );
			bli_obj_set_diag( BLIS_NONUNIT_DIAG, a2 );
			bli_scalm( &BLIS_TWO, &a2 );
			//bli_scalm( &BLIS_TWO, &a );
		} 
*/

		bli_setsc(  (2.0/1.0), 0.0, &alpha );
		bli_setsc( -(1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );

			dtime = bli_clock();


#ifdef PRINT
/*
			obj_t ar, ai;
			bli_obj_alias_to( a, ar );
			bli_obj_alias_to( a, ai );
			bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
			bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;

			bli_printm( "ar", &ar, "%4.1f", "" );
			bli_printm( "ai", &ai, "%4.1f", "" );
*/

			bli_invertd( &a );
			bli_printm( "a", &a, "%4.1f", "" );
			bli_invertd( &a );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS
			//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

			bli_trsm( side,
			//bli_trsm4m( side,
			//bli_trsm3m( side,
			          &alpha,
			          &a,
			          &c );
#else

		if ( bli_is_real( dt_a ) )
		{
			f77_char side   = 'L';
			f77_char uplo   = 'L';
			f77_char transa = 'N';
			f77_char diag   = 'N';
			f77_int  mm     = bli_obj_length( c );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldc    = bli_obj_col_stride( c );
			float *  alphap = bli_obj_buffer( alpha );
			float *  ap     = bli_obj_buffer( a );
			float *  cp     = bli_obj_buffer( c );

			strsm_( &side,
			        &uplo,
			        &transa,
			        &diag,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		else // if ( bli_is_complex( dt_a ) )
		{
			f77_char  side   = 'L';
			f77_char  uplo   = 'L';
			f77_char  transa = 'N';
			f77_char  diag   = 'N';
			f77_int   mm     = bli_obj_length( c );
			f77_int   nn     = bli_obj_width( c );
			f77_int   lda    = bli_obj_col_stride( a );
			f77_int   ldc    = bli_obj_col_stride( c );
			scomplex* alphap = bli_obj_buffer( alpha );
			scomplex* ap     = bli_obj_buffer( a );
			scomplex* cp     = bli_obj_buffer( c );

			ctrsm_( &side,
			//ztrsm_( &side,
			        &uplo,
			        &transa,
			        &diag,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		if ( bli_is_left( side ) )
			gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
		else
			gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt_a ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_trsm_blis" );
#else
		printf( "data_trsm_%s", BLAS );
#endif
		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )n, dtime_save, gflops );


		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &b );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
예제 #10
0
int main( int argc, char** argv )
{
	//bli_init();

#if 0
	obj_t a, b, c;
	obj_t aa, bb, cc;
	dim_t m, n, k;
	num_t dt;
	uplo_t uploa, uplob, uploc;

	{
		dt = BLIS_DOUBLE;

		m = 6;
		k = 6;
		n = 6;

		bli_obj_create( dt, m, k, 0, 0, &a );
		bli_obj_create( dt, k, n, 0, 0, &b );
		bli_obj_create( dt, m, n, 0, 0, &c );

		uploa = BLIS_UPPER;
		uploa = BLIS_LOWER;
		bli_obj_set_struc( BLIS_TRIANGULAR, &a );
		bli_obj_set_uplo( uploa, &a );
		bli_obj_set_diag_offset( -2, &a );

		uplob = BLIS_UPPER;
		uplob = BLIS_LOWER;
		bli_obj_set_struc( BLIS_TRIANGULAR, &b );
		bli_obj_set_uplo( uplob, &b );
		bli_obj_set_diag_offset( -2, &b );

		uploc = BLIS_UPPER;
		//uploc = BLIS_LOWER;
		//uploc = BLIS_ZEROS;
		//uploc = BLIS_DENSE;
		bli_obj_set_struc( BLIS_HERMITIAN, &c );
		//bli_obj_set_struc( BLIS_TRIANGULAR, &c );
		bli_obj_set_uplo( uploc, &c );
		bli_obj_set_diag_offset(  1, &c );

		bli_obj_alias_to( &a, &aa ); (void)aa;
		bli_obj_alias_to( &b, &bb ); (void)bb;
		bli_obj_alias_to( &c, &cc ); (void)cc;

		bli_randm( &a );
		bli_randm( &b );
		bli_randm( &c );
		//bli_mkherm( &a );
		//bli_mktrim( &a );

		bli_prune_unref_mparts( &cc, BLIS_M,
		                        &aa, BLIS_N );

		bli_printm( "c orig", &c, "%4.1f", "" );
		bli_printm( "c alias", &cc, "%4.1f", "" );
		bli_printm( "a orig", &a, "%4.1f", "" );
		bli_printm( "a alias", &aa, "%4.1f", "" );
		//bli_obj_print( "a struct", &a );
	}
#endif

	dim_t  p_begin, p_max, p_inc;
	gint_t m_input, n_input;
	char   uploa_ch;
	doff_t diagoffa;
	dim_t  bf;
	dim_t  n_way;
	char   part_dim_ch;
	bool_t go_fwd;
	char   out_ch;

	obj_t   a;
	blksz_t bfs;

	thrinfo_t thrinfo;
	dim_t  m, n;
	uplo_t uploa;
	bool_t part_m_dim, part_n_dim;
	bool_t go_bwd;
	dim_t  p;
	num_t  dt;
	dim_t  start, end;

	dim_t  width;
	siz_t  area;

	gint_t t_begin, t_stop, t_inc;
	dim_t  t;

	if ( argc == 13 )
	{
		sscanf( argv[1], "%u", &p_begin );
		sscanf( argv[2], "%u", &p_max );
		sscanf( argv[3], "%u", &p_inc );
		sscanf( argv[4], "%d", &m_input );
		sscanf( argv[5], "%d", &n_input );
		sscanf( argv[6], "%c",  &uploa_ch );
		sscanf( argv[7], "%d", &diagoffa );
		sscanf( argv[8], "%u", &bf );
		sscanf( argv[9], "%u", &n_way );
		sscanf( argv[10], "%c", &part_dim_ch );
		sscanf( argv[11], "%u", &go_fwd );
		sscanf( argv[12], "%c", &out_ch );
	}
	else
	{
		printf( "\n" );
		printf( " %s\n", argv[0] );
		printf( "\n" );
		printf( "  Simulate the dimension ranges assigned to threads when\n" );
		printf( "  partitioning a matrix for parallelism in BLIS.\n" );
		printf( "\n" );
		printf( " Usage:\n" );
		printf( "\n" );
		printf( "  %s p_beg p_max p_inc m n uplo doff bf n_way part_dim go_fwd out\n", argv[0] );
		printf( "\n" );
		printf( "  p_beg:    the first problem size p to test.\n" );
		printf( "  p_max:    the maximum problem size p to test.\n" );
		printf( "  p_inc:    the increase in problem size p between tests.\n" );
		printf( "  m:        the m dimension:\n" );
		printf( "  n:        the n dimension:\n" );
		printf( "            if m,n = -1: bind m,n to problem size p.\n" );
		printf( "            if m,n =  0: bind m,n to p_max.\n" );
		printf( "            if m,n >  0: hold m,n = c constant for all p.\n" );
		printf( "  uplo:     the uplo field of the matrix being partitioned:\n" );
		printf( "            'l': lower-stored (BLIS_LOWER)\n" );
		printf( "            'u': upper-stored (BLIS_UPPER)\n" );
		printf( "            'd': densely-stored (BLIS_DENSE)\n" );
		printf( "  doff:     the diagonal offset of the matrix being partitioned.\n" );
		printf( "  bf:       the simulated blocking factor. all thread ranges must\n" );
		printf( "            be a multiple of bf, except for the range that contains\n" );
		printf( "            the edge case (if one exists). the blocking factor\n" );
		printf( "            would typically correspond to a register blocksize.\n" );
		printf( "  n_way:    the number of ways of parallelism for which we are\n" );
		printf( "            partitioning (i.e.: the number of threads, or thread\n" );
		printf( "            groups).\n" );
		printf( "  part_dim: the dimension to partition:\n" );
		printf( "            'm': partition the m dimension.\n" );
		printf( "            'n': partition the n dimension.\n" );
		printf( "  go_fwd:   the direction to partition:\n" );
		printf( "            '1': forward, e.g. left-to-right (part_dim = 'm') or\n" );
		printf( "                 top-to-bottom (part_dim = 'n')\n" );
		printf( "            '0': backward, e.g. right-to-left (part_dim = 'm') or\n" );
		printf( "                 bottom-to-top (part_dim = 'n')\n" );
		printf( "            NOTE: reversing the direction does not change the\n" );
		printf( "            subpartitions' widths, but it does change which end of\n" );
		printf( "            the index range receives the edge case, if it exists.\n" );
		printf( "  out:      the type of output per thread-column:\n" );
		printf( "            'w': the width (and area) of the thread's subpartition\n" );
		printf( "            'r': the actual ranges of the thread's subpartition\n" );
		printf( "                 where the start and end points of each range are\n" );
		printf( "                 inclusive and exclusive, respectively.\n" );
		printf( "\n" );

		exit(1);
	}

	if ( m_input == 0 ) m_input = p_max;
	if ( n_input == 0 ) n_input = p_max;

	if ( part_dim_ch == 'm' ) { part_m_dim = TRUE;  part_n_dim = FALSE; }
	else                      { part_m_dim = FALSE; part_n_dim = TRUE;  }

	go_bwd = !go_fwd;

	if      ( uploa_ch == 'l' ) uploa = BLIS_LOWER;
	else if ( uploa_ch == 'u' ) uploa = BLIS_UPPER;
	else                        uploa = BLIS_DENSE;

	if ( part_n_dim )
	{
		if ( bli_is_upper( uploa ) ) { t_begin = n_way-1; t_stop = -1;    t_inc = -1; }
		else /* if lower or dense */ { t_begin = 0;       t_stop = n_way; t_inc =  1; }
	}
	else // if ( part_m_dim )
	{
		if ( bli_is_lower( uploa ) ) { t_begin = n_way-1; t_stop = -1;    t_inc = -1; }
		else /* if upper or dense */ { t_begin = 0;       t_stop = n_way; t_inc =  1; }
	}

	printf( "\n" );
	printf( "  part: %3s   doff: %3d   bf: %3d   output: %s\n",
	        ( part_n_dim ? ( go_fwd ? "l2r" : "r2l" )
	                     : ( go_fwd ? "t2b" : "b2t" ) ),
	        ( int )diagoffa, ( int )bf,
            ( out_ch == 'w' ? "width(area)" : "ranges" ) );
	printf( "              uplo: %3c   nt: %3u\n", uploa_ch, ( unsigned )n_way );
	printf( "\n" );

	printf( "             " );
	for ( t = t_begin; t != t_stop; t += t_inc )
	{
		if ( part_n_dim )
		{
			if      ( t == t_begin      ) printf( "left...      " );
			else if ( t == t_stop-t_inc ) printf( "     ...right" );
			else                          printf( "             " );
		}
		else // if ( part_m_dim )
		{
			if      ( t == t_begin      ) printf( "top...       " );
			else if ( t == t_stop-t_inc ) printf( "    ...bottom" );
			else                          printf( "             " );
		}
	}
	printf( "\n" );


	printf( "%4c x %4c  ", 'm', 'n' );
	for ( t = t_begin; t != t_stop; t += t_inc )
	{
		printf( "%9s %u  ", "thread", ( unsigned )t );
	}
	printf( "\n" );
	printf( "-------------" );
	for ( t = t_begin; t != t_stop; t += t_inc )
	{
		printf( "-------------" );
	}
	printf( "\n" );


	for ( p = p_begin; p <= p_max; p += p_inc )
	{
		if ( m_input < 0 ) m = ( dim_t )p;
		else               m = ( dim_t )m_input;
		if ( n_input < 0 ) n = ( dim_t )p;
		else               n = ( dim_t )n_input;

		dt = BLIS_DOUBLE;
		
		bli_obj_create( dt, m, n, 0, 0, &a );

		bli_obj_set_struc( BLIS_TRIANGULAR, &a );
		bli_obj_set_uplo( uploa, &a );
		bli_obj_set_diag_offset( diagoffa, &a );

		bli_randm( &a );

		bli_blksz_init_easy( &bfs, bf, bf, bf, bf );

		printf( "%4u x %4u  ", ( unsigned )m, ( unsigned )n );

		for ( t = t_begin; t != t_stop; t += t_inc )
		{
			thrinfo.n_way   = n_way;
			thrinfo.work_id = t;

			if      ( part_n_dim && go_fwd )
				area = bli_thread_get_range_weighted_l2r( &thrinfo, &a, &bfs, &start, &end );
			else if ( part_n_dim && go_bwd )
				area = bli_thread_get_range_weighted_r2l( &thrinfo, &a, &bfs, &start, &end );
			else if ( part_m_dim && go_fwd )
				area = bli_thread_get_range_weighted_t2b( &thrinfo, &a, &bfs, &start, &end );
			else // ( part_m_dim && go_bwd )
				area = bli_thread_get_range_weighted_b2t( &thrinfo, &a, &bfs, &start, &end );

			width = end - start;

			if ( out_ch == 'w' ) printf( "%4u(%6u) ", ( unsigned )width,
			                                            ( unsigned )area );
			else                 printf( "[%4u,%4u)  ", ( unsigned )start,
			                                              ( unsigned )end );
		}

		printf( "\n" );

		bli_obj_free( &a );
	}

	//bli_finalize();

	return 0;
}
예제 #11
0
파일: test_size.c 프로젝트: fmarrabal/blis
int main( int argc, char** argv )
{
    obj_t a, b, c;
    obj_t x, y;
    obj_t alpha, beta;
    dim_t m;
    num_t dt_a, dt_b, dt_c;
    num_t dt_alpha, dt_beta;
    int   ii;

#ifdef NBLIS
    bli_init();
#endif


    m = 4000;

    dt_a = BLIS_DOUBLE;
    dt_b = BLIS_DOUBLE;
    dt_c = BLIS_DOUBLE;
    dt_alpha = BLIS_DOUBLE;
    dt_beta = BLIS_DOUBLE;

    {


#ifdef NBLIS
        bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
        bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );

        bli_obj_create( dt_a, m, 1, 0, 0, &x );
        bli_obj_create( dt_a, m, 1, 0, 0, &y );

        bli_obj_create( dt_a, m, m, 0, 0, &a );
        bli_obj_create( dt_b, m, m, 0, 0, &b );
        bli_obj_create( dt_c, m, m, 0, 0, &c );

        bli_randm( &a );
        bli_randm( &b );
        bli_randm( &c );

        bli_setsc(  (2.0/1.0), 0.0, &alpha );
        bli_setsc( -(1.0/1.0), 0.0, &beta );

#endif

#ifdef NBLAS
        x.buffer     = malloc( m * 1 * sizeof( double ) );
        y.buffer     = malloc( m * 1 * sizeof( double ) );

        alpha.buffer = malloc( 1 * sizeof( double ) );
        beta.buffer  = malloc( 1 * sizeof( double ) );
        a.buffer     = malloc( m * m * sizeof( double ) );
        a.m          = m;
        a.n          = m;
        a.cs         = m;
        b.buffer     = malloc( m * m * sizeof( double ) );
        b.m          = m;
        b.n          = m;
        b.cs         = m;
        c.buffer     = malloc( m * m * sizeof( double ) );
        c.m          = m;
        c.n          = m;
        c.cs         = m;

        *((double*)alpha.buffer) =  2.0;
        *((double*)beta.buffer)  = -1.0;
#endif


#ifdef NBLIS

#if NBLIS >= 1
        for ( ii = 0; ii < 2000000000; ++ii )
        {
            bli_gemm( &BLIS_ONE,
                      &a,
                      &b,
                      &BLIS_ONE,
                      &c );
        }
#endif

#if NBLIS >= 2
        {
            bli_hemm( BLIS_LEFT,
                      &BLIS_ONE,
                      &a,
                      &b,
                      &BLIS_ONE,
                      &c );
        }
#endif

#if NBLIS >= 3
        {
            bli_herk( &BLIS_ONE,
                      &a,
                      &BLIS_ONE,
                      &c );
        }
#endif

#if NBLIS >= 4
        {
            bli_her2k( &BLIS_ONE,
                       &a,
                       &b,
                       &BLIS_ONE,
                       &c );
        }
#endif

#if NBLIS >= 5
        {
            bli_trmm( BLIS_LEFT,
                      &BLIS_ONE,
                      &a,
                      &c );
        }
#endif

#if NBLIS >= 6
        {
            bli_trsm( BLIS_LEFT,
                      &BLIS_ONE,
                      &a,
                      &c );
        }
#endif

#endif



#ifdef NBLAS

#if NBLAS >= 1
        for ( ii = 0; ii < 2000000000; ++ii )
        {
            f77_char transa = 'N';
            f77_char transb = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  kk     = bli_obj_width_after_trans( a );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldb    = bli_obj_col_stride( b );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  bp     = bli_obj_buffer( b );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dgemm_( &transa,
                    &transb,
                    &mm,
                    &nn,
                    &kk,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 2
        {
            f77_char side   = 'L';
            f77_char uplo   = 'L';
            f77_int  mm     = bli_obj_length( c );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldb    = bli_obj_col_stride( b );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  bp     = bli_obj_buffer( b );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dsymm_( &side,
                    &uplo,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 3
        {
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  kk     = bli_obj_width( a );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dsyrk_( &uplo,
                    &trans,
                    &mm,
                    &kk,
                    alphap,
                    ap, &lda,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 4
        {
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  kk     = bli_obj_width( a );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldb    = bli_obj_col_stride( b );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  bp     = bli_obj_buffer( b );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dsyr2k_( &uplo,
                     &trans,
                     &mm,
                     &kk,
                     alphap,
                     ap, &lda,
                     bp, &ldb,
                     betap,
                     cp, &ldc );
        }
#endif

#if NBLAS >= 5
        {
            f77_char side   = 'L';
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_char diag   = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  cp     = bli_obj_buffer( c );

            dtrmm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 6
        {
            f77_char side   = 'L';
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_char diag   = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  cp     = bli_obj_buffer( c );

            dtrsm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 7
        {
            f77_char  transa = 'N';
            f77_char  transb = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   kk     = bli_obj_width_after_trans( a );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldb    = bli_obj_col_stride( b );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* bp     = bli_obj_buffer( b );
            dcomplex* betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zgemm_( &transa,
                    &transb,
                    &mm,
                    &nn,
                    &kk,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 8
        {
            f77_char  side   = 'L';
            f77_char  uplo   = 'L';
            f77_int   mm     = bli_obj_length( c );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldb    = bli_obj_col_stride( b );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* bp     = bli_obj_buffer( b );
            dcomplex* betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zhemm_( &side,
                    &uplo,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 9
        {
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   kk     = bli_obj_width( a );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldc    = bli_obj_col_stride( c );
            double*   alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            double*   betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zherk_( &uplo,
                    &trans,
                    &mm,
                    &kk,
                    alphap,
                    ap, &lda,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 10
        {
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   kk     = bli_obj_width( a );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldb    = bli_obj_col_stride( b );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* bp     = bli_obj_buffer( b );
            double*   betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zher2k_( &uplo,
                     &trans,
                     &mm,
                     &kk,
                     alphap,
                     ap, &lda,
                     bp, &ldb,
                     betap,
                     cp, &ldc );
        }
#endif

#if NBLAS >= 11
        {
            f77_char  side   = 'L';
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_char  diag   = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* cp     = bli_obj_buffer( c );

            ztrmm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 12
        {
            f77_char  side   = 'L';
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_char  diag   = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* cp     = bli_obj_buffer( c );

            ztrsm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif


#endif


#ifdef NBLIS
        bli_obj_free( &x );
        bli_obj_free( &y );

        bli_obj_free( &alpha );
        bli_obj_free( &beta );

        bli_obj_free( &a );
        bli_obj_free( &b );
        bli_obj_free( &c );
#endif

#ifdef NBLAS
        free( x.buffer );
        free( y.buffer );

        free( alpha.buffer );
        free( beta.buffer );

        free( a.buffer );
        free( b.buffer );
        free( c.buffer );
#endif
    }

#ifdef NBLIS
    bli_finalize();
#endif

    return 0;
}
예제 #12
0
파일: test_herk.c 프로젝트: figual/blis
int main( int argc, char** argv )
{
	obj_t a, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, k;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, k_input;
	num_t dt_a, dt_c;
	num_t dt_alpha, dt_beta;
	int   r, n_repeats;
	uplo_t uplo;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	n_repeats = 3;

    if( argc < 7 ) 
    {   
        printf("Usage:\n");
        printf("test_foo.x m n k p_begin p_inc p_end:\n");
        exit;
    }   

    int world_size, world_rank, provided;
    MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
    MPI_Comm_size( MPI_COMM_WORLD, &world_size );
    MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );

    m_input = strtol( argv[1], NULL, 10 );
    k_input = strtol( argv[3], NULL, 10 );
    p_begin = strtol( argv[4], NULL, 10 );
    p_inc   = strtol( argv[5], NULL, 10 );
    p_end   = strtol( argv[6], NULL, 10 );

	dt_a = BLIS_DOUBLE;
	dt_c = BLIS_DOUBLE;
	dt_alpha = BLIS_DOUBLE;
	dt_beta = BLIS_DOUBLE;

	uplo = BLIS_LOWER;

    for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
	{

		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
		else               k =     ( dim_t )    k_input;


		bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );

		bli_obj_create( dt_a, m, k, 0, 0, &a );
		bli_obj_create( dt_c, m, m, 0, 0, &c );
		bli_obj_create( dt_c, m, m, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &c );

		bli_obj_set_struc( BLIS_HERMITIAN, &c );
		bli_obj_set_uplo( uplo, &c );


		bli_setsc(  (2.0/1.0), 0.0, &alpha );
		bli_setsc(  (1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();

#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

			bli_herk( &alpha,
			          &a,
			          &beta,
			          &c );

#else

			f77_char uploa  = 'L';
			f77_char transa = 'N';
			f77_int  mm     = bli_obj_length( &c );
			f77_int  kk     = bli_obj_width_after_trans( &a );
			f77_int  lda    = bli_obj_col_stride( &a );
			f77_int  ldc    = bli_obj_col_stride( &c );
			double*  alphap = bli_obj_buffer( &alpha );
			double*  ap     = bli_obj_buffer( &a );
			double*  betap  = bli_obj_buffer( &beta );
			double*  cp     = bli_obj_buffer( &c );

			dsyrk_( &uploa,
			        &transa,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        betap,
			        cp, &ldc );
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 );

#ifdef BLIS
		printf( "data_herk_blis" );
#else
		printf( "data_herk_%s", BLAS );
#endif
		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )k, dtime_save, gflops );


		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
예제 #13
0
int main( int argc, char** argv )
{
	obj_t a, b, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, n, k;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, n_input, k_input;
	num_t dt_a, dt_b, dt_c;
	num_t dt_alpha, dt_beta;
	int   r, n_repeats;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	n_repeats = 3;

#ifndef PRINT
	p_begin = 40;
	p_end   = 2000;
	p_inc   = 40;

	m_input = -1;
	n_input = -1;
	k_input = -1;
#else
	p_begin = 16;
	p_end   = 16;
	p_inc   = 1;

	m_input = 8;
	k_input = 16;
	n_input = 16;
#endif

	dt_a = BLIS_DOUBLE;
	dt_b = BLIS_DOUBLE;
	dt_c = BLIS_DOUBLE;
	dt_alpha = BLIS_DOUBLE;
	dt_beta = BLIS_DOUBLE;

	for ( p = p_begin; p <= p_end; p += p_inc )
	{

		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;
		if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
		else               k =     ( dim_t )    k_input;


		bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );

		bli_obj_create( dt_a, m, k, 0, 0, &a );
		bli_obj_create( dt_b, k, n, 0, 0, &b );
		bli_obj_create( dt_c, m, n, 0, 0, &c );
		bli_obj_create( dt_c, m, n, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &b );
		bli_randm( &c );


		bli_setsc(  (2.0/1.0), 0.0, &alpha );
		bli_setsc( -(1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();


#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "b", &b, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS
			//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

			bli_gemm( &alpha,
			          &a,
			          &b,
			          &beta,
			          &c );

#else

			f77_char transa = 'N';
			f77_char transb = 'N';
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*  alphap = bli_obj_buffer( alpha );
			double*  ap     = bli_obj_buffer( a );
			double*  bp     = bli_obj_buffer( b );
			double*  betap  = bli_obj_buffer( beta );
			double*  cp     = bli_obj_buffer( c );

			dgemm_( &transa,
			        &transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 );

#ifdef BLIS
		printf( "data_gemm_blis" );
#else
		printf( "data_gemm_%s", BLAS );
#endif
		printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )k,
		        ( unsigned long )n, dtime_save, gflops );

		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &b );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
예제 #14
0
void runBenchmark(enum BenchmarkType benchmarkType, int32_t size, struct BenchmarkResult* out) {
	const int32_t experiments = 10;
	struct BenchmarkResult result = {
		.time = __builtin_nan(""),
		.flops = __builtin_nan(""),
		.throughput = __builtin_nan("")
	};

	switch (benchmarkType) {
		case BenchmarkTypeNaiveDGEMM:
		{
			double* a = malloc(size * size * sizeof(double));
			double* b = malloc(size * size * sizeof(double));
			double* c = malloc(size * size * sizeof(double));

			for (int32_t i = 0; i < size * size; i++) {
				a[i] = ((double) rand()) / ((double) RAND_MAX);
				b[i] = ((double) rand()) / ((double) RAND_MAX);
			}
			memset(c, 0, size * size * sizeof(double));

			for (int32_t experiment = 0; experiment < experiments; ++experiment) {
				const double timeStart = time_sec();
				dgemm_naive(size, a, b, c);
				result.time = fmin(result.time, time_sec() - timeStart);
			}
			result.flops = 2.0 * size * size * size / result.time;

			free(a);
			free(b);
			free(c);
			break;
		}
		case BenchmarkTypeBlockedDGEMM:
		{
			double* a = malloc(size * size * sizeof(double));
			double* b = malloc(size * size * sizeof(double));
			double* c = malloc(size * size * sizeof(double));

			for (int32_t i = 0; i < size * size; i++) {
				a[i] = ((double) rand()) / ((double) RAND_MAX);
				b[i] = ((double) rand()) / ((double) RAND_MAX);
			}
			memset(c, 0, size * size * sizeof(double));

			for (int32_t experiment = 0; experiment < experiments; ++experiment) {
				const double timeStart = time_sec();
				dgemm_blocked(size, a, b, c);
				result.time = fmin(result.time, time_sec() - timeStart);
			}
			result.flops = 2.0 * size * size * size / result.time;

			free(a);
			free(b);
			free(c);
			break;
		}
		case BenchmarkTypeBlisDGEMM:
		{
			obj_t alpha, beta;
			bli_obj_scalar_init_detached(BLIS_DOUBLE, &alpha);
			bli_obj_scalar_init_detached(BLIS_DOUBLE, &beta);
			bli_setsc( 1.0, 0.0, &alpha);
			bli_setsc( 0.0, 0.0, &beta);

			obj_t a, b, c;
			bli_obj_create(BLIS_DOUBLE, size, size, 0, 0, &a);
			bli_obj_create(BLIS_DOUBLE, size, size, 0, 0, &b);
			bli_obj_create(BLIS_DOUBLE, size, size, 0, 0, &c);
			bli_randm(&a);
			bli_randm(&b);
			bli_randm(&c);

			for (int32_t i = 0; i < experiments; ++i) {
				const double timeStart = time_sec();
				bli_gemm(&alpha, &a, &b, &beta, &c);
				result.time = fmin(result.time, time_sec() - timeStart);
			}
			result.flops = 2.0 * size * size * size / result.time;

			bli_obj_free(&a);
			bli_obj_free(&b);
			bli_obj_free(&c);
			break;
		}
		case BenchmarkTypePointerChasing:
		{
			struct xor_shift xor_shift = xor_shift_init(UINT32_C(1), __builtin_ctz(size));
			uint32_t last_index = 1;
			void** data = (void**) malloc(size * sizeof(void*));
			data[0] = &data[1];
			for (size_t i = 0; i < size; i++) {
				const uint32_t index = xor_shift_next(&xor_shift);
				data[last_index] = &data[index];
				last_index = index;
			}
			const int32_t iterations = 16777216 / size;
			for (int32_t experiment = 0; experiment < experiments; ++experiment) {
				const double timeStart = time_sec();
				for (int32_t iteration = 0; iteration < iterations; iteration++) {
					chase_pointers(data);
				}
				result.time = fmin(result.time, (time_sec() - timeStart) / ((double) iterations) / ((double) size));
			}
			free((void*) data);
			break;
		}
		default:
			__builtin_unreachable();
	}
	*out = result;
}
예제 #15
0
파일: test_gemm.c 프로젝트: ipolkovn/blis
void libblis_test_gemm_experiment( test_params_t* params,
                                   test_op_t*     op,
                                   iface_t        iface,
                                   num_t          datatype,
                                   char*          pc_str,
                                   char*          sc_str,
                                   unsigned int   p_cur,
                                   double*        perf,
                                   double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m, n, k;

	trans_t      transa;
	trans_t      transb;

	obj_t        kappa;
	obj_t        alpha, a, b, beta, c;
	obj_t        c_save;


	// Map the dimension specifier to actual dimensions.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
	n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );
	k = libblis_test_get_dim_from_prob_size( op->dim_spec[2], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_trans( pc_str[0], &transa );
	bli_param_map_char_to_blis_trans( pc_str[1], &transb );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &kappa );
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, transa,
	                          sc_str[0], m, k, &a );
	libblis_test_mobj_create( params, datatype, transb,
	                          sc_str[1], k, n, &b );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[2], m, n, &c );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[2], m, n, &c_save );

	// Set alpha and beta.
	if ( bli_obj_is_real( c ) )
	{
		bli_setsc(  1.2,  0.0, &alpha );
		bli_setsc( -1.0,  0.0, &beta );
	}
	else
	{
		bli_setsc(  1.2,  0.8, &alpha );
		bli_setsc( -1.0,  1.0, &beta );
	}

	// Randomize A, B, and C, and save C.
	bli_randm( &a );
	bli_randm( &b );
	bli_randm( &c );
	bli_copym( &c, &c_save );

	// Normalize by k.
	bli_setsc( 1.0/( double )k, 0.0, &kappa );
	bli_scalm( &kappa, &a );
	bli_scalm( &kappa, &b );

	// Apply the parameters.
	bli_obj_set_conjtrans( transa, a );
	bli_obj_set_conjtrans( transb, b );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &c_save, &c );

		time = bli_clock();

		libblis_test_gemm_impl( iface, &alpha, &a, &b, &beta, &c );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( c ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_gemm_check( &alpha, &a, &b, &beta, &c, &c_save, resid );

	// Zero out performance and residual if output matrix is empty.
	libblis_test_check_empty_problem( &c, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &b );
	bli_obj_free( &c );
	bli_obj_free( &c_save );
}
예제 #16
0
파일: test_dotxf.c 프로젝트: elemental/blis
void libblis_test_dotxf_experiment( test_params_t* params,
                                    test_op_t*     op,
                                    iface_t        iface,
                                    num_t          datatype,
                                    char*          pc_str,
                                    char*          sc_str,
                                    unsigned int   p_cur,
                                    double*        perf,
                                    double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m, b_n;

	conj_t       conjat, conjx;

	obj_t        alpha, a, x, beta, y;
	obj_t        y_save;


	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Query the operation's fusing factor for the current datatype.
	b_n = bli_dotxf_fusefac( datatype );

	// Store the fusing factor so that the driver can retrieve the value
	// later when printing results.
	op->dim_aux[0] = b_n;

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_conj( pc_str[0], &conjat );
	bli_param_map_char_to_blis_conj( pc_str[1], &conjx );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                                            sc_str[0], m, b_n, &a );
	libblis_test_vobj_create( params, datatype, sc_str[1], m, &x );
	libblis_test_vobj_create( params, datatype, sc_str[2], b_n, &y );
	libblis_test_vobj_create( params, datatype, sc_str[2], b_n, &y_save );

	// Set alpha.
	if ( bli_obj_is_real( y ) )
	{
		bli_setsc(  1.2,  0.0, &alpha );
		bli_setsc( -1.0,  0.0, &beta );
	}
	else
	{
		bli_setsc(  1.2,  0.1, &alpha );
		bli_setsc( -1.0, -0.1, &beta );
	}

	// Randomize A, x, and y, and save y.
	bli_randm( &a );
	bli_randv( &x );
	bli_randv( &y );
	bli_copyv( &y, &y_save );

	// Apply the parameters.
	bli_obj_set_conj( conjat, a );
	bli_obj_set_conj( conjx, x );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copyv( &y_save, &y );

		time = bli_clock();

		libblis_test_dotxf_impl( iface, &alpha, &a, &x, &beta, &y );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( y ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_dotxf_check( &alpha, &a, &x, &beta, &y, &y_save, resid );

	// Zero out performance and residual if output vector is empty.
	libblis_test_check_empty_problem( &y, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &x );
	bli_obj_free( &y );
	bli_obj_free( &y_save );
}
예제 #17
0
void libblis_test_trmm3_experiment( test_params_t* params,
                                    test_op_t*     op,
                                    mt_impl_t      impl,
                                    num_t          datatype,
                                    char*          pc_str,
                                    char*          sc_str,
                                    unsigned int   p_cur,
                                    double*        perf,
                                    double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m, n;
	dim_t        mn_side;

	side_t       side;
	uplo_t       uploa;
	trans_t      transa;
	diag_t       diaga;
	trans_t      transb;

	obj_t        kappa;
	obj_t        alpha, a, b, beta, c;
	obj_t        c_save;


	// Map the dimension specifier to actual dimensions.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
	n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_side( pc_str[0], &side );
	bli_param_map_char_to_blis_uplo( pc_str[1], &uploa );
	bli_param_map_char_to_blis_trans( pc_str[2], &transa );
	bli_param_map_char_to_blis_diag( pc_str[3], &diaga );
	bli_param_map_char_to_blis_trans( pc_str[4], &transb );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &kappa );
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	bli_set_dim_with_side( side, m, n, mn_side );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[0], mn_side, mn_side, &a );
	libblis_test_mobj_create( params, datatype, transb,
	                          sc_str[1], m,       n,       &b );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[2], m,       n,       &c );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[2], m,       n,       &c_save );

	// Set alpha and beta.
	if ( bli_obj_is_real( c ) )
	{
		bli_setsc(  0.8,  0.0, &alpha );
		bli_setsc( -1.0,  0.0, &beta );
	}
	else
	{
		bli_setsc(  0.8,  0.6, &alpha );
		bli_setsc( -1.0,  0.5, &beta );
	}

	// Set the structure and uplo properties of A.
	bli_obj_set_struc( BLIS_TRIANGULAR, a );
	bli_obj_set_uplo( uploa, a );

	// Randomize A, make it densely triangular.
	bli_randm( &a );
	bli_mktrim( &a );

	// Randomize B and C, and save C.
	bli_randm( &b );
	bli_randm( &c );
	bli_copym( &c, &c_save );

	// Normalize by m.
	bli_setsc( 1.0/( double )m, 0.0, &kappa );
	bli_scalm( &kappa, &b );

	// Apply the remaining parameters.
	bli_obj_set_conjtrans( transa, a );
	bli_obj_set_diag( diaga, a );
	bli_obj_set_conjtrans( transb, b );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &c_save, &c );

		time = bli_clock();

		libblis_test_trmm3_impl( impl, side, &alpha, &a, &b, &beta, &c );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( c ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_trmm3_check( side, &alpha, &a, &b, &beta, &c, &c_save, resid );

	// Zero out performance and residual if output matrix is empty.
	libblis_test_check_empty_problem( &c, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &b );
	bli_obj_free( &c );
	bli_obj_free( &c_save );
}
예제 #18
0
void libblis_test_trsv_experiment( test_params_t* params,
                                   test_op_t*     op,
                                   mt_impl_t      impl,
                                   num_t          datatype,
                                   char*          pc_str,
                                   char*          sc_str,
                                   unsigned int   p_cur,
                                   double*        perf,
                                   double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m;

	uplo_t       uploa;
	trans_t      transa;
	diag_t       diaga;

	obj_t        kappa;
	obj_t        alpha, a, x;
	obj_t        x_save;


	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_uplo( pc_str[0], &uploa );
	bli_param_map_char_to_blis_trans( pc_str[1], &transa );
	bli_param_map_char_to_blis_diag( pc_str[2], &diaga );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &kappa );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[0], m, m, &a );
	libblis_test_vobj_create( params, datatype,
	                          sc_str[1], m,    &x );
	libblis_test_vobj_create( params, datatype,
	                          sc_str[1], m,    &x_save );

	// Set alpha.
	if ( bli_obj_is_real( x ) )
		bli_setsc( 2.0,  0.0, &alpha );
	else
		bli_setsc( 2.0, -1.0, &alpha );

	// Set the structure and uplo properties of A.
	bli_obj_set_struc( BLIS_TRIANGULAR, a );
	bli_obj_set_uplo( uploa, a );

	// Randomize A, make it densely triangular.
	bli_randm( &a );
	bli_mktrim( &a );

	// Randomize x and save.
	bli_randv( &x );
	bli_copyv( &x, &x_save );

	// Normalize vectors by m.
	bli_setsc( 1.0/( double )m, 0.0, &kappa );
	bli_scalv( &kappa, &x );
	bli_scalv( &kappa, &x_save );

	// Apply the remaining parameters.
	bli_obj_set_conjtrans( transa, a );
	bli_obj_set_diag( diaga, a );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &x_save, &x );

		time = bli_clock();

		libblis_test_trsv_impl( impl, &alpha, &a, &x );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( x ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_trsv_check( &alpha, &a, &x, &x_save, resid );

	// Zero out performance and residual if output vector is empty.
	libblis_test_check_empty_problem( &x, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &x );
	bli_obj_free( &x_save );
}
예제 #19
0
파일: test_her.c 프로젝트: ShawnLess/blis
void libblis_test_her_experiment( test_params_t* params,
                                  test_op_t*     op,
                                  iface_t        iface,
                                  num_t          datatype,
                                  char*          pc_str,
                                  char*          sc_str,
                                  unsigned int   p_cur,
                                  double*        perf,
                                  double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m;

	uplo_t       uploa;
	conj_t       conjx;

	obj_t        alpha, x, a;
	obj_t        a_save;


	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_uplo( pc_str[0], &uploa );
	bli_param_map_char_to_blis_conj( pc_str[1], &conjx );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );

	// Create test operands (vectors and/or matrices).
	libblis_test_vobj_create( params, datatype,
	                          sc_str[0], m,    &x );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[1], m, m, &a );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[1], m, m, &a_save );

	// Set alpha.
	//bli_copysc( &BLIS_MINUS_ONE, &alpha );
	bli_setsc( -1.0, 0.0, &alpha );

	// Randomize x.
	bli_randv( &x );

	// Set the structure and uplo properties of A.
	bli_obj_set_struc( BLIS_HERMITIAN, a );
	bli_obj_set_uplo( uploa, a );

	// Randomize A, make it densely Hermitian, and zero the unstored triangle
	// to ensure the implementation is reads only from the stored region.
	bli_randm( &a );
	bli_mkherm( &a );
	bli_mktrim( &a );

	// Save A and set its structure and uplo properties.
	bli_obj_set_struc( BLIS_HERMITIAN, a_save );
	bli_obj_set_uplo( uploa, a_save );
	bli_copym( &a, &a_save );

	// Apply the remaining parameters.
	bli_obj_set_conj( conjx, x );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &a_save, &a );

		time = bli_clock();

		libblis_test_her_impl( iface, &alpha, &x, &a );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( a ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_her_check( &alpha, &x, &a, &a_save, resid );

	// Zero out performance and residual if output matrix is empty.
	libblis_test_check_empty_problem( &a, perf, resid );

	// Free the test objects.
	bli_obj_free( &x );
	bli_obj_free( &a );
	bli_obj_free( &a_save );
}
예제 #20
0
파일: test_trsv.c 프로젝트: figual/blis
int main( int argc, char** argv )
{
	obj_t a, x;
	obj_t x_save;
	obj_t alpha;
	dim_t m;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input;
	num_t dt_a, dt_x;
	num_t dt_alpha;
	int   r, n_repeats;
	uplo_t uplo;

	double dtime;
	double dtime_save;
	double gflops;

	//bli_init();

	n_repeats = 3;

#ifndef PRINT
	p_begin = 40;
	p_end   = 2000;
	p_inc   = 40;

	m_input = -1;
#else
	p_begin = 16;
	p_end   = 16;
	p_inc   = 1;

	m_input = 15;
	n_input = 15;
#endif

	dt_alpha = dt_a = dt_x = BLIS_DOUBLE;

	uplo = BLIS_LOWER;

	// Begin with initializing the last entry to zero so that
	// matlab allocates space for the entire array once up-front.
	for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
	printf( "data_trsv_blis" );
#else
	printf( "data_trv_%s", BLAS );
#endif
	printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n",
	        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
	        ( unsigned long )0, 0.0 );

	for ( p = p_begin; p <= p_end; p += p_inc )
	{

		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;


		bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );

		bli_obj_create( dt_a, m, m, 0, 0, &a );
		bli_obj_create( dt_x, m, 1, 0, 0, &x );
		bli_obj_create( dt_x, m, 1, 0, 0, &x_save );

		bli_randm( &a );
		bli_randm( &x );

		bli_obj_set_struc( BLIS_TRIANGULAR, &a );
		bli_obj_set_uplo( uplo, &a );
		bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a );
		bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a );

		// Randomize A and zero the unstored triangle to ensure the
		// implementation reads only from the stored region.
		bli_randm( &a );
		bli_mktrim( &a );

		// Load the diagonal of A to make it more likely to be invertible.
		bli_shiftd( &BLIS_TWO, &a );

		bli_setsc(  (1.0/1.0), 0.0, &alpha );


		bli_copym( &x, &x_save );
	
		dtime_save = DBL_MAX;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &x_save, &x );


			dtime = bli_clock();

#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "x", &x, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_trsv( &BLIS_ONE,
			          &a,
			          &x );

#else

			f77_char uploa  = 'L';
			f77_char transa = 'N';
			f77_char diaga  = 'N';
			f77_int  mm     = bli_obj_length( &a );
			f77_int  lda    = bli_obj_col_stride( &a );
			f77_int  incx   = bli_obj_vector_inc( &x );
			double*  ap     = bli_obj_buffer( &a );
			double*  xp     = bli_obj_buffer( &x );

			dtrsv_( &uploa,
			        &transa,
			        &diaga,
			        &mm,
			        ap, &lda,
			        xp, &incx );
#endif

#ifdef PRINT
			bli_printm( "x after", &x, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		gflops = ( 1.0 * m * m ) / ( dtime_save * 1.0e9 );

#ifdef BLIS
		printf( "data_trsv_blis" );
#else
		printf( "data_trsv_%s", BLAS );
#endif
		printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m, gflops );

		bli_obj_free( &alpha );

		bli_obj_free( &a );
		bli_obj_free( &x );
		bli_obj_free( &x_save );
	}

	//bli_finalize();

	return 0;
}