Ejemplo n.º 1
0
void libblis_test_addv_check( obj_t*  alpha,
                              obj_t*  beta,
                              obj_t*  x,
                              obj_t*  y,
                              double* resid )
{
	num_t  dt      = bli_obj_datatype( *x );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *x );
	dim_t  m       = bli_obj_vector_dim( *x );

	conj_t conjx   = bli_obj_conj_status( *x );

	obj_t  aplusb;
	obj_t  alpha_conj;
	obj_t  norm_r, m_r, temp_r;

	double junk;

	//
	// Pre-conditions:
	// - x is set to alpha.
	// - y_orig is set to beta.
	// Note:
	// - alpha and beta should have non-zero imaginary components in the
	//   complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := y_orig + conjx(x)
	//
	// is functioning correctly if
	//
	//   fnormv(y) - sqrt( absqsc( beta + conjx(alpha) ) * m )
	//
	// is negligible.
	//

	bli_obj_scalar_init_detached( dt,      &aplusb );
	bli_obj_scalar_init_detached( dt_real, &temp_r );
	bli_obj_scalar_init_detached( dt_real, &norm_r );
	bli_obj_scalar_init_detached( dt_real, &m_r );

	bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj );

	bli_fnormv( y, &norm_r );

	bli_copysc( beta, &aplusb );
	bli_addsc( &alpha_conj, &aplusb );

	bli_setsc( ( double )m, 0.0, &m_r );

	bli_absqsc( &aplusb, &temp_r );
	bli_mulsc( &m_r, &temp_r );
	bli_sqrtsc( &temp_r, &temp_r );
	bli_subsc( &temp_r, &norm_r );

	bli_getsc( &norm_r, resid, &junk );
}
Ejemplo n.º 2
0
void libblis_test_scalv_check
     (
       test_params_t* params,
       obj_t*         beta,
       obj_t*         y,
       obj_t*         y_orig,
       double*        resid
     )
{
	num_t  dt      = bli_obj_dt( y );
	num_t  dt_real = bli_obj_dt_proj_to_real( y );

	dim_t  m       = bli_obj_vector_dim( y );

	obj_t  norm_y_r;
	obj_t  nbeta;

	obj_t  y2;

	double junk;

	//
	// Pre-conditions:
	// - y_orig is randomized.
	// Note:
	// - beta should have a non-zero imaginary component in the complex
	//   cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := conjbeta(beta) * y_orig
	//
	// is functioning correctly if
	//
	//   normf( y + -conjbeta(beta) * y_orig )
	//
	// is negligible.
	//

	bli_obj_create( dt, m, 1, 0, 0, &y2 );
    bli_copyv( y_orig, &y2 );

	bli_obj_scalar_init_detached( dt,      &nbeta );
	bli_obj_scalar_init_detached( dt_real, &norm_y_r );

	bli_copysc( beta, &nbeta );
	bli_mulsc( &BLIS_MINUS_ONE, &nbeta );

	bli_scalv( &nbeta, &y2 );
    bli_addv( &y2, y );

    bli_normfv( y, &norm_y_r );

    bli_getsc( &norm_y_r, resid, &junk );

    bli_obj_free( &y2 );
}
Ejemplo n.º 3
0
void libblis_test_normfm_check
     (
       test_params_t* params,
       obj_t*         beta,
       obj_t*         x,
       obj_t*         norm,
       double*        resid
     )
{
	num_t  dt_real = bli_obj_datatype_proj_to_real( *x );
	dim_t  m       = bli_obj_length( *x );
	dim_t  n       = bli_obj_width( *x );

	obj_t  m_r, n_r, temp_r;

	double junk;

	//
	// Pre-conditions:
	// - x is set to beta.
	// Note:
	// - beta should have a non-zero imaginary component in the complex
	//   cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   norm := normf( x )
	//
	// is functioning correctly if
	//
	//   norm = sqrt( absqsc( beta ) * m * n )
	//
	// where m and n are the dimensions of x.
	//

	bli_obj_scalar_init_detached( dt_real, &temp_r );
	bli_obj_scalar_init_detached( dt_real, &m_r );
	bli_obj_scalar_init_detached( dt_real, &n_r );

	bli_setsc( ( double )m, 0.0, &m_r );
	bli_setsc( ( double )n, 0.0, &n_r );

	bli_absqsc( beta, &temp_r );
	bli_mulsc( &m_r, &temp_r );
	bli_mulsc( &n_r, &temp_r );
	bli_sqrtsc( &temp_r, &temp_r );
	bli_subsc( &temp_r, norm );

	bli_getsc( norm, resid, &junk );
}
Ejemplo n.º 4
0
void libblis_test_xpbym_check
     (
       test_params_t* params,
       obj_t*         x,
       obj_t*         beta,
       obj_t*         y,
       obj_t*         y_orig,
       double*        resid
     )
{
	num_t  dt      = bli_obj_dt( y );
	num_t  dt_real = bli_obj_dt_proj_to_real( y );

	dim_t  m       = bli_obj_length( y );
	dim_t  n       = bli_obj_width( y );

	obj_t  x_temp, y_temp;
	obj_t  norm;

	double junk;

	//
	// Pre-conditions:
	// - x is randomized.
	// - y_orig is randomized.
	// Note:
	// - alpha should have a non-zero imaginary component in the complex
	//   cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := beta * y_orig + conjx(x)
	//
	// is functioning correctly if
	//
	//   normf( y - ( beta * y_orig + conjx(x) ) )
	//
	// is negligible.
	//

	bli_obj_scalar_init_detached( dt_real, &norm );

    bli_obj_create( dt, m, n, 0, 0, &x_temp );
    bli_obj_create( dt, m, n, 0, 0, &y_temp );

    bli_copym( x,      &x_temp );
    bli_copym( y_orig, &y_temp );

    bli_scalm( beta, &y_temp );
	bli_addm( &x_temp, &y_temp );

    bli_subm( &y_temp, y );
    bli_normfm( y, &norm );
    bli_getsc( &norm, resid, &junk );

    bli_obj_free( &x_temp );
    bli_obj_free( &y_temp );
}
Ejemplo n.º 5
0
void libblis_test_hemv_check( obj_t*  alpha,
                              obj_t*  a,
                              obj_t*  x,
                              obj_t*  beta,
                              obj_t*  y,
                              obj_t*  y_orig,
                              double* resid )
{
    num_t  dt      = bli_obj_datatype( *y );
    num_t  dt_real = bli_obj_datatype_proj_to_real( *y );

    dim_t  m       = bli_obj_vector_dim( *y );

    obj_t  v;
    obj_t  norm;

    double junk;

    //
    // Pre-conditions:
    // - a is randomized and Hermitian.
    // - x is randomized.
    // - y_orig is randomized.
    // Note:
    // - alpha and beta should have non-zero imaginary components in the
    //   complex cases in order to more fully exercise the implementation.
    //
    // Under these conditions, we assume that the implementation for
    //
    //   y := beta * y_orig + alpha * conja(A) * conjx(x)
    //
    // is functioning correctly if
    //
    //   normf( y - v )
    //
    // is negligible, where
    //
    //   v = beta * y_orig + alpha * conja(A_dense) * x
    //

    bli_obj_scalar_init_detached( dt_real, &norm );

    bli_obj_create( dt, m, 1, 0, 0, &v );

    bli_copyv( y_orig, &v );

    bli_mkherm( a );
    bli_obj_set_struc( BLIS_GENERAL, *a );
    bli_obj_set_uplo( BLIS_DENSE, *a );

    bli_gemv( alpha, a, x, beta, &v );

    bli_subv( &v, y );
    bli_normfv( y, &norm );
    bli_getsc( &norm, resid, &junk );

    bli_obj_free( &v );
}
Ejemplo n.º 6
0
void libblis_test_axpbyv_check( obj_t*  alpha,
                                obj_t*  x,
                                obj_t*  beta,
                                obj_t*  y,
                                obj_t*  y_orig,
                                double* resid )
{
	num_t  dt      = bli_obj_datatype( *y );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );

	dim_t  m       = bli_obj_vector_dim( *y );

	obj_t  x_temp, y_temp;
	obj_t  norm;

	double junk;

	//
	// Pre-conditions:
	// - x is randomized.
	// - y_orig is randomized.
	// Note:
	// - alpha should have a non-zero imaginary component in the complex
	//   cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := beta * y_orig + alpha * conjx(x)
	//
	// is functioning correctly if
	//
	//   normf( y - ( beta * y_orig + alpha * conjx(x) ) )
	//
	// is negligible.
	//

	bli_obj_scalar_init_detached( dt_real, &norm );

	bli_obj_create( dt, m, 1, 0, 0, &x_temp );
	bli_obj_create( dt, m, 1, 0, 0, &y_temp );

	bli_copyv( x,      &x_temp );
	bli_copyv( y_orig, &y_temp );

	bli_scalv( alpha, &x_temp );
	bli_scalv(  beta, &y_temp );
	bli_addv( &x_temp, &y_temp );

	bli_subv( &y_temp, y );
	bli_normfv( y, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &x_temp );
	bli_obj_free( &y_temp );
}
Ejemplo n.º 7
0
void bli_obj_scalar_detach( obj_t* a,
                            obj_t* alpha )
{
	num_t dt_a = bli_obj_datatype( *a );

	// Initialize alpha to be a bufferless internal scalar of the same
	// datatype as A.
	bli_obj_scalar_init_detached( dt_a, alpha );

	// Copy the internal scalar in A to alpha.
	bli_obj_copy_internal_scalar( *a, *alpha );
}
Ejemplo n.º 8
0
void libblis_test_amaxv_check
(
    test_params_t* params,
    obj_t*         x,
    obj_t*         index,
    double*        resid
)
{
    obj_t index_test;
    obj_t chi_i;
    obj_t chi_i_test;
    dim_t i;
    dim_t i_test;

    double i_d, junk;
    double i_d_test;

    //
    // Pre-conditions:
    // - x is randomized.
    //
    // Under these conditions, we assume that the implementation for
    //
    //   index := amaxv( x )
    //
    // is functioning correctly if
    //
    //   x[ index ] = max( x )
    //
    // where max() is implemented via the bli_?amaxv_test() function.
    //

    // The following two calls have already been made by the caller. That
    // is, the index object has already been created and the library's
    // amaxv implementation has already been tested.
    //bli_obj_scalar_init_detached( BLIS_INT, &index );
    //bli_amaxv( x, &index );
    bli_getsc( index, &i_d, &junk );
    i = i_d;
    bli_acquire_vi( i, x, &chi_i );

    bli_obj_scalar_init_detached( BLIS_INT, &index_test );
    bli_amaxv_test( x, &index_test );
    bli_getsc( &index_test, &i_d_test, &junk );
    i_test = i_d_test;
    bli_acquire_vi( i_test, x, &chi_i_test );

    // Verify that the values referenced by index and index_test are equal.
    if ( bli_obj_equals( &chi_i, &chi_i_test ) ) *resid = 0.0;
    else                                         *resid = 1.0;
}
Ejemplo n.º 9
0
void bli_obj_scalar_init_detached_copy_of( num_t  dt,
                                           conj_t conj,
                                           obj_t* alpha,
                                           obj_t* beta )
{
	obj_t alpha_local;

	// Make a local copy of alpha so we can apply the conj parameter.
	bli_obj_alias_to( *alpha, alpha_local );
	bli_obj_apply_conj( conj, alpha_local );

	// Initialize beta without a buffer and then attach its internal buffer.
	bli_obj_scalar_init_detached( dt, beta );

	// Copy the scalar value in a to object b, conjugating and/or
	// typecasting if needed.
	bli_copysc( &alpha_local, beta );
}
Ejemplo n.º 10
0
void libblis_test_randv_check
     (
       test_params_t* params,
       obj_t*         x,
       double*        resid
     )
{
	num_t  dt_real = bli_obj_datatype_proj_to_real( *x );
	dim_t  m_x     = bli_obj_vector_dim( *x );
	obj_t  sum;

	*resid = 0.0;

	//
	// The two most likely ways that randv would fail is if all elements
	// were zero, or if all elements were greater than or equal to one.
	// We check both of these conditions by computing the sum of the
	// absolute values of the elements of x.
	//

	bli_obj_scalar_init_detached( dt_real, &sum );

	bli_norm1v( x, &sum );

	if ( bli_is_float( dt_real ) )
	{
		float*  sum_x = bli_obj_buffer_at_off( sum );

		if      ( *sum_x == *bli_d0   ) *resid = 1.0;
		else if ( *sum_x >= 2.0 * m_x ) *resid = 2.0;
	}
	else // if ( bli_is_double( dt_real ) )
	{
		double* sum_x = bli_obj_buffer_at_off( sum );

		if      ( *sum_x == *bli_d0   ) *resid = 1.0;
		else if ( *sum_x >= 2.0 * m_x ) *resid = 2.0;
	}
}
Ejemplo n.º 11
0
void libblis_test_copym_check
     (
       test_params_t* params,
       obj_t*         x,
       obj_t*         y,
       double*        resid
     )
{
	num_t  dt_real = bli_obj_dt_proj_to_real( x );

	obj_t  norm_y_r;

	double junk;

	//
	// Pre-conditions:
	// - x is randomized.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := conjx(x)
	//
	// is functioning correctly if
	//
	//   normfm( y - conjx(x) )
	//
	// is negligible.
	//

	bli_obj_scalar_init_detached( dt_real, &norm_y_r );

	bli_subm( x, y );

	bli_normfm( y, &norm_y_r );

	bli_getsc( &norm_y_r, resid, &junk );
}
Ejemplo n.º 12
0
void libblis_test_her2k_experiment
     (
       test_params_t* params,
       test_op_t*     op,
       iface_t        iface,
       num_t          datatype,
       char*          pc_str,
       char*          sc_str,
       unsigned int   p_cur,
       double*        perf,
       double*        resid
     )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = DBL_MAX;
	double       time;

	dim_t        m, k;

	uplo_t       uploc;
	trans_t      transa, transb;

	obj_t        alpha, a, b, beta, c;
	obj_t        c_save;


	// Map the dimension specifier to actual dimensions.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
	k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_uplo( pc_str[0], &uploc );
	bli_param_map_char_to_blis_trans( pc_str[1], &transa );
	bli_param_map_char_to_blis_trans( pc_str[2], &transb );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, transa,
	                          sc_str[0], m, k, &a );
	libblis_test_mobj_create( params, datatype, transb,
	                          sc_str[1], m, k, &b );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[2], m, m, &c );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[2], m, m, &c_save );

	// Set alpha and beta.
	if ( bli_obj_is_real( c ) )
	{
		bli_setsc(  0.8, 0.0, &alpha );
		bli_setsc( -1.0, 0.0, &beta );
	}
	else
	{
		// For her2k, alpha may be complex, but beta must be real-valued
		// (in order to preserve the Hermitian structure of C).
		bli_setsc(  0.8, 0.5, &alpha );
		bli_setsc( -1.0, 0.0, &beta );
	}

	// Randomize A and B.
	libblis_test_mobj_randomize( params, TRUE, &a );
	libblis_test_mobj_randomize( params, TRUE, &b );

	// Set the structure and uplo properties of C.
	bli_obj_set_struc( BLIS_HERMITIAN, c );
	bli_obj_set_uplo( uploc, c );

	// Randomize A, make it densely Hermitian, and zero the unstored triangle
	// to ensure the implementation is reads only from the stored region.
	libblis_test_mobj_randomize( params, TRUE, &c );
	bli_mkherm( &c );
	bli_mktrim( &c );

	// Save C and set its structure and uplo properties.
	bli_obj_set_struc( BLIS_HERMITIAN, c_save );
	bli_obj_set_uplo( uploc, c_save );
	bli_copym( &c, &c_save );

	// Apply the remaining parameters.
	bli_obj_set_conjtrans( transa, a );
	bli_obj_set_conjtrans( transb, b );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &c_save, &c );

		time = bli_clock();

		libblis_test_her2k_impl( iface, &alpha, &a, &b, &beta, &c );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( c ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_her2k_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid );

	// Zero out performance and residual if output matrix is empty.
	libblis_test_check_empty_problem( &c, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &b );
	bli_obj_free( &c );
	bli_obj_free( &c_save );
}
Ejemplo n.º 13
0
void libblis_test_scalv_experiment
     (
       test_params_t* params,
       test_op_t*     op,
       iface_t        iface,
       char*          dc_str,
       char*          pc_str,
       char*          sc_str,
       unsigned int   p_cur,
       double*        perf,
       double*        resid
     )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = DBL_MAX;
	double       time;

	num_t        datatype;

	dim_t        m;

	conj_t       conjbeta;

	obj_t        beta, y;
	obj_t        y_save;


	// Use the datatype of the first char in the datatype combination string.
	bli_param_map_char_to_blis_dt( dc_str[0], &datatype );

	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	libblis_test_vobj_create( params, datatype, sc_str[0], m, &y );
	libblis_test_vobj_create( params, datatype, sc_str[0], m, &y_save );

	// Set beta.
	if ( bli_obj_is_real( &y ) )
		bli_setsc( -2.0,  0.0, &beta );
	else
		bli_setsc(  0.0, -2.0, &beta );

	// Randomize and save y.
	libblis_test_vobj_randomize( params, FALSE, &y );
	bli_copyv( &y, &y_save );

	// Apply the parameters.
	bli_obj_set_conj( conjbeta, &beta );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copyv( &y_save, &y );

		time = bli_clock();

		libblis_test_scalv_impl( iface, &beta, &y );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( &y ) ) *perf *= 6.0;

	// Perform checks.
	libblis_test_scalv_check( params, &beta, &y, &y_save, resid );

	// Zero out performance and residual if output vector is empty.
	libblis_test_check_empty_problem( &y, perf, resid );

	// Free the test objects.
	bli_obj_free( &y );
	bli_obj_free( &y_save );
}
Ejemplo n.º 14
0
void libblis_test_addv_experiment( test_params_t* params,
                                   test_op_t*     op,
                                   mt_impl_t      impl,
                                   num_t          datatype,
                                   char*          pc_str,
                                   char*          sc_str,
                                   unsigned int   p_cur,
                                   double*        perf,
                                   double*        resid )
{
	double       time_min  = 1e9;
	double       time;

	dim_t        m;

	conj_t       conjx;

	obj_t        alpha, beta;
	obj_t        x, y;


	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_conj( pc_str[0], &conjx );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	libblis_test_vobj_create( params, datatype, sc_str[0], m, &x );
	libblis_test_vobj_create( params, datatype, sc_str[1], m, &y );

	// Initialize alpha and beta.
	bli_setsc( -1.0, -1.0, &alpha );
	bli_setsc(  3.0,  3.0, &beta );

	// Set x and y to alpha and beta, respectively.
	bli_setv( &alpha, &x );
	bli_setv( &beta,  &y );

	// Apply the parameters.
	bli_obj_set_conj( conjx, x );

	// Disable repeats since bli_copyv() is not yet tested. 
	//for ( i = 0; i < n_repeats; ++i )
	{
		time = bli_clock();

		libblis_test_addv_impl( impl, &x, &y );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( x ) ) *perf *= 2.0;

	// Perform checks.
	libblis_test_addv_check( &alpha, &beta, &x, &y, resid );

	// Zero out performance and residual if output vector is empty.
	libblis_test_check_empty_problem( &y, perf, resid );

	// Free the test objects.
	bli_obj_free( &x );
	bli_obj_free( &y );
}
Ejemplo n.º 15
0
void libblis_test_trsm_experiment
     (
       test_params_t* params,
       test_op_t*     op,
       iface_t        iface,
       num_t          datatype,
       char*          pc_str,
       char*          sc_str,
       unsigned int   p_cur,
       double*        perf,
       double*        resid
     )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m, n;
	dim_t        mn_side;

	side_t       side;
	uplo_t       uploa;
	trans_t      transa;
	diag_t       diaga;

	obj_t        alpha, a, b;
	obj_t        b_save;


	// Map the dimension specifier to actual dimensions.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
	n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_side( pc_str[0], &side );
	bli_param_map_char_to_blis_uplo( pc_str[1], &uploa );
	bli_param_map_char_to_blis_trans( pc_str[2], &transa );
	bli_param_map_char_to_blis_diag( pc_str[3], &diaga );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );

	// Create test operands (vectors and/or matrices).
	bli_set_dim_with_side( side, m, n, mn_side );
	libblis_test_mobj_create( params, datatype, transa,
	                          sc_str[0], mn_side, mn_side, &a );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[1], m,       n,       &b );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[1], m,       n,       &b_save );

	// Set alpha.
	if ( bli_obj_is_real( b ) )
	{
		bli_setsc(  2.0,  0.0, &alpha );
	}
	else
	{
		bli_setsc(  2.0,  0.0, &alpha );
	}

	// Set the structure and uplo properties of A.
	bli_obj_set_struc( BLIS_TRIANGULAR, a );
	bli_obj_set_uplo( uploa, a );

	// Randomize A, load the diagonal, make it densely triangular.
	libblis_test_mobj_randomize( params, TRUE, &a );
	libblis_test_mobj_load_diag( params, &a );
	bli_mktrim( &a );

	// Randomize B and save B.
	libblis_test_mobj_randomize( params, TRUE, &b );
	bli_copym( &b, &b_save );

	// Apply the remaining parameters.
	bli_obj_set_conjtrans( transa, a );
	bli_obj_set_diag( diaga, a );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &b_save, &b );

		time = bli_clock();

		libblis_test_trsm_impl( iface, side, &alpha, &a, &b );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( b ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_trsm_check( params, side, &alpha, &a, &b, &b_save, resid );

	// Zero out performance and residual if output matrix is empty.
	libblis_test_check_empty_problem( &b, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &b );
	bli_obj_free( &b_save );
}
Ejemplo n.º 16
0
void libblis_test_dotxaxpyf_check
     (
       test_params_t* params,
       obj_t*         alpha,
       obj_t*         at,
       obj_t*         a,
       obj_t*         w,
       obj_t*         x,
       obj_t*         beta,
       obj_t*         y,
       obj_t*         z,
       obj_t*         y_orig,
       obj_t*         z_orig,
       double*        resid
     )
{
	num_t  dt      = bli_obj_datatype( *y );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );

	dim_t  m       = bli_obj_vector_dim( *z );
	dim_t  b_n     = bli_obj_vector_dim( *y );

	dim_t  i;

	obj_t  a1, chi1, psi1, v, q;
	obj_t  alpha_chi1;
	obj_t  norm;

	double resid1, resid2;
	double junk;

	//
	// Pre-conditions:
	// - a is randomized.
	// - w is randomized.
	// - x is randomized.
	// - y is randomized.
	// - z is randomized.
	// - at is an alias to a.
	// Note:
	// - alpha and beta should have a non-zero imaginary component in the
	//   complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := beta * y_orig + alpha * conjat(A^T) * conjw(w)
	//   z :=        z_orig + alpha * conja(A)    * conjx(x)
	//
	// is functioning correctly if
	//
	//   normf( y - v )
	//
	// and
	//
	//   normf( z - q )
	//
	// are negligible, where v and q contain y and z as computed by repeated
	// calls to dotxv and axpyv, respectively.
	//

	bli_obj_scalar_init_detached( dt_real, &norm );
	bli_obj_scalar_init_detached( dt,      &alpha_chi1 );

	bli_obj_create( dt, b_n, 1, 0, 0, &v );
	bli_obj_create( dt, m,   1, 0, 0, &q );

	bli_copyv( y_orig, &v );
	bli_copyv( z_orig, &q );

	// v := beta * v + alpha * conjat(at) * conjw(w)
	for ( i = 0; i < b_n; ++i )
	{
		bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, at, &a1 );
		bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, &v, &psi1 );

		bli_dotxv( alpha, &a1, w, beta, &psi1 );
	}

	// q := q + alpha * conja(a) * conjx(x)
	for ( i = 0; i < b_n; ++i )
	{
		bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 );
		bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, x, &chi1 );

		bli_copysc( &chi1, &alpha_chi1 );
		bli_mulsc( alpha, &alpha_chi1 );

		bli_axpyv( &alpha_chi1, &a1, &q );
	}


	bli_subv( y, &v );
	bli_normfv( &v, &norm );
	bli_getsc( &norm, &resid1, &junk );

	bli_subv( z, &q );
	bli_normfv( &q, &norm );
	bli_getsc( &norm, &resid2, &junk );


	*resid = bli_fmaxabs( resid1, resid2 );

	bli_obj_free( &v );
	bli_obj_free( &q );
}
Ejemplo n.º 17
0
void libblis_test_syr2_check( obj_t*  alpha,
                              obj_t*  x,
                              obj_t*  y,
                              obj_t*  a,
                              obj_t*  a_orig,
                              double* resid )
{
	num_t  dt      = bli_obj_datatype( *a );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *a );

	dim_t  m_a     = bli_obj_length( *a );

	obj_t  xt, yt;
	obj_t  t, v, w1, w2;
	obj_t  tau, rho, norm;

	double junk;

	//
	// Pre-conditions:
	// - x is randomized.
	// - y is randomized.
	// - a is randomized and symmetric.
	// Note:
	// - alpha should have a non-zero imaginary component in the
	//   complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   A := A_orig + alpha * conjx(x) * conjy(y)^T + alpha * conjy(y) * conjx(x)^T
	//
	// is functioning correctly if
	//
	//   normf( v - w )
	//
	// is negligible, where
	//
	//   v = A * t
	//   w = ( A_orig + alpha * conjx(x) * conjy(y)^T + alpha * conjy(y) * conjx(x)^T ) * t
	//     = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + alpha * conjy(y) * conjx(x)^T * t
	//     = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + alpha * conjy(y) * rho
	//     = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + w1
	//     = A_orig * t + alpha * conjx(x) * rho            + w1
	//     = A_orig * t + w2                                + w1
	//

	bli_mksymm( a );
	bli_mksymm( a_orig );
	bli_obj_set_struc( BLIS_GENERAL, *a );
	bli_obj_set_struc( BLIS_GENERAL, *a_orig );
	bli_obj_set_uplo( BLIS_DENSE, *a );
	bli_obj_set_uplo( BLIS_DENSE, *a_orig );

	bli_obj_scalar_init_detached( dt,      &tau );
	bli_obj_scalar_init_detached( dt,      &rho );
	bli_obj_scalar_init_detached( dt_real, &norm );

	bli_obj_create( dt, m_a, 1, 0, 0, &t );
	bli_obj_create( dt, m_a, 1, 0, 0, &v );
	bli_obj_create( dt, m_a, 1, 0, 0, &w1 );
	bli_obj_create( dt, m_a, 1, 0, 0, &w2 );

	bli_obj_alias_to( *x, xt );
	bli_obj_alias_to( *y, yt );

	bli_setsc( 1.0/( double )m_a, -1.0/( double )m_a, &tau );
	bli_setv( &tau, &t );

	bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v );

	bli_dotv( &xt, &t, &rho );
	bli_mulsc( alpha, &rho );
	bli_scal2v( &rho, y, &w1 );

	bli_dotv( &yt, &t, &rho );
	bli_mulsc( alpha, &rho );
	bli_scal2v( &rho, x, &w2 );

	bli_addv( &w2, &w1 );

	bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w1 );

	bli_subv( &w1, &v );
	bli_normfv( &v, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &t );
	bli_obj_free( &v );
	bli_obj_free( &w1 );
	bli_obj_free( &w2 );
}
Ejemplo n.º 18
0
void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params,
                                           test_op_t*     op,
                                           mt_impl_t      impl,
                                           num_t          datatype,
                                           char*          pc_str,
                                           char*          sc_str,
                                           unsigned int   p_cur,
                                           double*        perf,
                                           double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m, n, k;

	char         sc_a = 'c';
	char         sc_b = 'r';

	side_t       side = BLIS_LEFT;
	uplo_t       uploa;

	obj_t        kappa;
	obj_t        alpha;
	obj_t        a_big, a, b;
	obj_t        b11, c11;
	obj_t        ap, bp;
	obj_t        a1xp, a11p, bx1p, b11p;
	obj_t        c11_save;


	// Map the dimension specifier to actual dimensions.
	k = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Fix m and n to MR and NR, respectively.
	m = bli_blksz_for_type( datatype, gemm_mr );
	n = bli_blksz_for_type( datatype, gemm_nr );

	// Store the register blocksizes so that the driver can retrieve the
	// values later when printing results.
	op->dim_aux[0] = m;
	op->dim_aux[1] = n;

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_uplo( pc_str[0], &uploa );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &kappa );
	bli_obj_scalar_init_detached( datatype, &alpha );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_a,      k+m, k+m, &a_big );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_b,      k+m, n,   &b );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[0], m,   n,   &c11 );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[0], m,   n,   &c11_save );

	// Set alpha.
	if ( bli_obj_is_real( b ) )
	{
		bli_setsc(  2.0,  0.0, &alpha );
	}
	else
	{
		bli_setsc(  2.0,  0.0, &alpha );
	}

	// Set the structure, uplo, and diagonal offset properties of A.
	bli_obj_set_struc( BLIS_TRIANGULAR, a_big );
	bli_obj_set_uplo( uploa, a_big );

	// Randomize A and make it densely triangular.
	bli_randm( &a_big );

	// Normalize B and save.
	bli_randm( &b );
	bli_setsc( 1.0/( double )m, 0.0, &kappa );
	bli_scalm( &kappa, &b );

	// Use the last m rows of A_big as A.
	bli_acquire_mpart_t2b( BLIS_SUBPART1, k, m, &a_big, &a );

	// Locate the B11 block of B, copy to C11, and save.
	if ( bli_obj_is_lower( a ) ) 
		bli_acquire_mpart_t2b( BLIS_SUBPART1, k, m, &b, &b11 );
	else
		bli_acquire_mpart_t2b( BLIS_SUBPART1, 0, m, &b, &b11 );
	bli_copym( &b11, &c11 );
	bli_copym( &c11, &c11_save );


	// Initialize pack objects.
	bli_obj_init_pack( &ap );
	bli_obj_init_pack( &bp );

	// Create pack objects for a and b.
	libblis_test_pobj_create( gemm_mr,
	                          gemm_mr,
	                          BLIS_INVERT_DIAG,
	                          BLIS_PACKED_ROW_PANELS,
	                          BLIS_BUFFER_FOR_A_BLOCK,
	                          &a, &ap );
	libblis_test_pobj_create( gemm_mr,
	                          gemm_nr,
	                          BLIS_NO_INVERT_DIAG,
	                          BLIS_PACKED_COL_PANELS,
	                          BLIS_BUFFER_FOR_B_PANEL,
	                          &b, &bp );

	// Pack the contents of a to ap.
	bli_packm_blk_var3( &a, &ap );

	// Pack the contents of b to bp.
	bli_packm_blk_var2( &b, &bp );


	// Create subpartitions from the a and b panels.
	bli_gemmtrsm_ukr_make_subparts( k, &ap, &bp,
	                                &a1xp, &a11p, &bx1p, &b11p );


	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &c11_save, &c11 );

		// Re-pack the contents of b to bp.
		bli_packm_blk_var2( &b, &bp );

		time = bli_clock();

		libblis_test_gemmtrsm_ukr_impl( impl, side, &alpha,
		                                &a1xp, &a11p, &bx1p, &b11p, &c11 );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m * n * k + 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( b ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_gemmtrsm_ukr_check( side, &alpha,
	                                 &a1xp, &a11p, &bx1p, &b11p, &c11, &c11_save, resid );

	// Zero out performance and residual if output matrix is empty.
	//libblis_test_check_empty_problem( &c11, perf, resid );

	// Release packing buffers within pack objects.
	bli_obj_release_pack( &ap );
	bli_obj_release_pack( &bp );

	// Free the test objects.
	bli_obj_free( &a_big );
	bli_obj_free( &b );
	bli_obj_free( &c11 );
	bli_obj_free( &c11_save );
}
Ejemplo n.º 19
0
void libblis_test_her_check( obj_t*  alpha,
                             obj_t*  x,
                             obj_t*  a,
                             obj_t*  a_orig,
                             double* resid )
{
	num_t  dt      = bli_obj_datatype( *a );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *a );

	dim_t  m_a     = bli_obj_length( *a );

	obj_t  xh, t, v, w;
	obj_t  tau, rho, norm;

	double junk;

	//
	// Pre-conditions:
	// - x is randomized.
	// - a is randomized and Hermitian.
	// Note:
	// - alpha must be real-valued.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   A := A_orig + alpha * conjx(x) * conjx(x)^H
	//
	// is functioning correctly if
	//
	//   normf( v - w )
	//
	// is negligible, where
	//
	//   v = A * t
	//   w = ( A_orig + alpha * conjx(x) * conjx(x)^H ) * t
	//     =   A_orig * t + alpha * conjx(x) * conjx(x)^H * t
	//     =   A_orig * t + alpha * conjx(x) * rho
	//     =   A_orig * t + w
	//

	bli_mkherm( a );
	bli_mkherm( a_orig );
	bli_obj_set_struc( BLIS_GENERAL, *a );
	bli_obj_set_struc( BLIS_GENERAL, *a_orig );
	bli_obj_set_uplo( BLIS_DENSE, *a );
	bli_obj_set_uplo( BLIS_DENSE, *a_orig );

	bli_obj_scalar_init_detached( dt,      &tau );
	bli_obj_scalar_init_detached( dt,      &rho );
	bli_obj_scalar_init_detached( dt_real, &norm );

	bli_obj_create( dt, m_a, 1, 0, 0, &t );
	bli_obj_create( dt, m_a, 1, 0, 0, &v );
	bli_obj_create( dt, m_a, 1, 0, 0, &w );

	bli_obj_alias_with_conj( BLIS_CONJUGATE, *x, xh );

	bli_setsc( 1.0/( double )m_a, -1.0/( double )m_a, &tau );
	bli_setv( &tau, &t );

	bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v );

	bli_dotv( &xh, &t, &rho );
	bli_mulsc( alpha, &rho );
	bli_scal2v( &rho, x, &w );
	bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w );

	bli_subv( &w, &v );
	bli_normfv( &v, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &t );
	bli_obj_free( &v );
	bli_obj_free( &w );
}
Ejemplo n.º 20
0
void libblis_test_her_experiment( test_params_t* params,
                                  test_op_t*     op,
                                  iface_t        iface,
                                  num_t          datatype,
                                  char*          pc_str,
                                  char*          sc_str,
                                  unsigned int   p_cur,
                                  double*        perf,
                                  double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m;

	uplo_t       uploa;
	conj_t       conjx;

	obj_t        alpha, x, a;
	obj_t        a_save;


	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_uplo( pc_str[0], &uploa );
	bli_param_map_char_to_blis_conj( pc_str[1], &conjx );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );

	// Create test operands (vectors and/or matrices).
	libblis_test_vobj_create( params, datatype,
	                          sc_str[0], m,    &x );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[1], m, m, &a );
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[1], m, m, &a_save );

	// Set alpha.
	//bli_copysc( &BLIS_MINUS_ONE, &alpha );
	bli_setsc( -1.0, 0.0, &alpha );

	// Randomize x.
	bli_randv( &x );

	// Set the structure and uplo properties of A.
	bli_obj_set_struc( BLIS_HERMITIAN, a );
	bli_obj_set_uplo( uploa, a );

	// Randomize A, make it densely Hermitian, and zero the unstored triangle
	// to ensure the implementation is reads only from the stored region.
	bli_randm( &a );
	bli_mkherm( &a );
	bli_mktrim( &a );

	// Save A and set its structure and uplo properties.
	bli_obj_set_struc( BLIS_HERMITIAN, a_save );
	bli_obj_set_uplo( uploa, a_save );
	bli_copym( &a, &a_save );

	// Apply the remaining parameters.
	bli_obj_set_conj( conjx, x );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &a_save, &a );

		time = bli_clock();

		libblis_test_her_impl( iface, &alpha, &x, &a );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( a ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_her_check( &alpha, &x, &a, &a_save, resid );

	// Zero out performance and residual if output matrix is empty.
	libblis_test_check_empty_problem( &a, perf, resid );

	// Free the test objects.
	bli_obj_free( &x );
	bli_obj_free( &a );
	bli_obj_free( &a_save );
}
Ejemplo n.º 21
0
void libblis_test_gemmtrsm_ukr_check( side_t  side,
                                      obj_t*  alpha,
                                      obj_t*  a1x,
                                      obj_t*  a11,
                                      obj_t*  bx1,
                                      obj_t*  b11,
                                      obj_t*  c11,
                                      obj_t*  c11_orig,
                                      double* resid )
{
	num_t  dt      = bli_obj_datatype( *b11 );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *b11 );

	dim_t  m       = bli_obj_length( *b11 );
	dim_t  n       = bli_obj_width( *b11 );
	dim_t  k       = bli_obj_width( *a1x );

	obj_t  kappa, norm;
	obj_t  t, v, w, z;

	double junk;

	//
	// Pre-conditions:
	// - a1x, a11, bx1, c11_orig are randomized; a11 is triangular.
	// - contents of b11 == contents of c11.
	// - side == BLIS_LEFT.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   B := inv(A11) * ( alpha * B11 - A1x * Bx1 )       (side = left)
	//
	// is functioning correctly if
	//
	//   fnorm( v - z )
	//
	// is negligible, where
	//
	//   v = B11 * t
	//
	//   z = ( inv(A11) * ( alpha * B11_orig - A1x * Bx1 ) ) * t
	//     = inv(A11) * ( alpha * B11_orig * t - A1x * Bx1 * t )
	//     = inv(A11) * ( alpha * B11_orig * t - A1x * w )
	//

	bli_obj_scalar_init_detached( dt,      &kappa );
	bli_obj_scalar_init_detached( dt_real, &norm );

	if ( bli_is_left( side ) )
	{
		bli_obj_create( dt, n, 1, 0, 0, &t );
		bli_obj_create( dt, m, 1, 0, 0, &v );
		bli_obj_create( dt, k, 1, 0, 0, &w );
		bli_obj_create( dt, m, 1, 0, 0, &z );
	}
	else // else if ( bli_is_left( side ) )
	{
		// BLIS does not currently support right-side micro-kernels.
		bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
	}

	bli_randv( &t );
	bli_setsc( 1.0/( double )n, 0.0, &kappa );
	bli_scalv( &kappa, &t );

	bli_gemv( &BLIS_ONE, b11, &t, &BLIS_ZERO, &v );

	// Restore the diagonal of a11 to its original, un-inverted state
	// (needed for trsv).
	bli_invertd( a11 );

	if ( bli_is_left( side ) )
	{
		bli_gemv( &BLIS_ONE, bx1, &t, &BLIS_ZERO, &w );
		bli_gemv( alpha, c11_orig, &t, &BLIS_ZERO, &z );
		bli_gemv( &BLIS_MINUS_ONE, a1x, &w, &BLIS_ONE, &z );
		bli_trsv( &BLIS_ONE, a11, &z );
	}
	else // else if ( bli_is_left( side ) )
	{
		// BLIS does not currently support right-side micro-kernels.
		bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
	}

	bli_subv( &z, &v );
	bli_fnormv( &v, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &t );
	bli_obj_free( &v );
	bli_obj_free( &w );
	bli_obj_free( &z );
}
Ejemplo n.º 22
0
void libblis_test_her2k_check
     (
       test_params_t* params,
       obj_t*         alpha,
       obj_t*         a,
       obj_t*         b,
       obj_t*         beta,
       obj_t*         c,
       obj_t*         c_orig,
       double*        resid
     )
{
	num_t  dt      = bli_obj_datatype( *c );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *c );

	dim_t  m       = bli_obj_length( *c );
	dim_t  k       = bli_obj_width_after_trans( *a );

	obj_t  alphac, ah, bh;
	obj_t  norm;
	obj_t  t, v, w1, w2, z;

	double junk;

	//
	// Pre-conditions:
	// - a is randomized.
	// - b is randomized.
	// - c_orig is randomized and Hermitian.
	// Note:
	// - alpha should have a non-zero imaginary component in the
	//   complex cases in order to more fully exercise the implementation.
	// - beta must be real-valued.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   C := beta * C_orig + alpha * transa(A) * transb(B)^H + conj(alpha) * transb(B) * transa(A)^H
	//
	// is functioning correctly if
	//
	//   normf( v - z )
	//
	// is negligible, where
	//
	//   v = C * t
	//   z = ( beta * C_orig + alpha * transa(A) * transb(B)^H + conj(alpha) * transb(B) * transa(A)^H ) * t
	//     = beta * C_orig * t + alpha * transa(A) * transb(B)^H * t + conj(alpha) * transb(B) * transa(A)^H * t
	//     = beta * C_orig * t + alpha * transa(A) * transb(B)^H * t + conj(alpha) * transb(B) * w2
	//     = beta * C_orig * t + alpha * transa(A) * w1              + conj(alpha) * transb(B) * w2
	//     = beta * C_orig * t + alpha * transa(A) * w1              + z
	//     = beta * C_orig * t + z
	//

	bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *a, ah );
	bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *b, bh );

	bli_obj_scalar_init_detached( dt_real, &norm );
	bli_obj_scalar_init_detached_copy_of( dt, BLIS_CONJUGATE, alpha, &alphac );

	bli_obj_create( dt, m, 1, 0, 0, &t );
	bli_obj_create( dt, m, 1, 0, 0, &v );
	bli_obj_create( dt, k, 1, 0, 0, &w1 );
	bli_obj_create( dt, k, 1, 0, 0, &w2 );
	bli_obj_create( dt, m, 1, 0, 0, &z );

	libblis_test_vobj_randomize( params, TRUE, &t );

	bli_hemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v );

	bli_gemv( &BLIS_ONE, &ah, &t, &BLIS_ZERO, &w2 );
	bli_gemv( &BLIS_ONE, &bh, &t, &BLIS_ZERO, &w1 );
	bli_gemv( alpha, a, &w1, &BLIS_ZERO, &z );
	bli_gemv( &alphac, b, &w2, &BLIS_ONE, &z );
	bli_hemv( beta, c_orig, &t, &BLIS_ONE, &z );

	bli_subv( &z, &v );
	bli_normfv( &v, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &t );
	bli_obj_free( &v );
	bli_obj_free( &w1 );
	bli_obj_free( &w2 );
	bli_obj_free( &z );
}
Ejemplo n.º 23
0
void libblis_test_fnormv_experiment( test_params_t* params,
                                     test_op_t*     op,
                                     mt_impl_t      impl,
                                     num_t          datatype,
                                     char*          pc_str,
                                     char*          sc_str,
                                     unsigned int   p_cur,
                                     double*        perf,
                                     double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	num_t        dt_real   = bli_datatype_proj_to_real( datatype );

	double       time_min  = 1e9;
	double       time;

	dim_t        m;

	obj_t        beta, norm;
	obj_t        x;


	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Map parameter characters to BLIS constants.


	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &beta );
	bli_obj_scalar_init_detached( dt_real,  &norm );

	// Create test operands (vectors and/or matrices).
	libblis_test_vobj_create( params, datatype, sc_str[0], m, &x );

	// Initialize beta to 2 - 2i.
	bli_setsc( 2.0, -2.0, &beta );

	// Set all elements of x to beta.
	bli_setv( &beta, &x );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		time = bli_clock();

		libblis_test_fnormv_impl( impl, &x, &norm );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( x ) ) *perf *= 2.0;

	// Perform checks.
	libblis_test_fnormv_check( &beta, &x, &norm, resid );

	// Zero out performance and residual if input vector is empty.
	libblis_test_check_empty_problem( &x, perf, resid );

	// Free the test objects.
	bli_obj_free( &x );
}
Ejemplo n.º 24
0
void libblis_test_axpyf_experiment( test_params_t* params,
                                    test_op_t*     op,
                                    iface_t        iface,
                                    num_t          datatype,
                                    char*          pc_str,
                                    char*          sc_str,
                                    unsigned int   p_cur,
                                    double*        perf,
                                    double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m, b_n;

	conj_t       conja, conjx;

	obj_t        alpha, a, x, y;
	obj_t        y_save;


	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Query the operation's fusing factor for the current datatype.
	b_n = bli_axpyf_fusefac( datatype );

	// Store the fusing factor so that the driver can retrieve the value
	// later when printing results.
	op->dim_aux[0] = b_n;

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_conj( pc_str[0], &conja );
	bli_param_map_char_to_blis_conj( pc_str[1], &conjx );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                                            sc_str[0], m, b_n, &a );
	libblis_test_vobj_create( params, datatype, sc_str[1], b_n, &x );
	libblis_test_vobj_create( params, datatype, sc_str[2], m, &y );
	libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save );

	// Set alpha.
	if ( bli_obj_is_real( y ) )
	{
		bli_setsc( -1.0,  0.0, &alpha );
	}
	else
	{
		bli_setsc(  0.0, -1.0, &alpha );
	}

	// Randomize A, x, and y, and save y.
	bli_randm( &a );
	bli_randv( &x );
	bli_randv( &y );
	bli_copyv( &y, &y_save );

	// Apply the parameters.
	bli_obj_set_conj( conja, a );
	bli_obj_set_conj( conjx, x );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copyv( &y_save, &y );

		time = bli_clock();

		libblis_test_axpyf_impl( iface, &alpha, &a, &x, &y );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( y ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_axpyf_check( &alpha, &a, &x, &y, &y_save, resid );

	// Zero out performance and residual if output vector is empty.
	libblis_test_check_empty_problem( &y, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &x );
	bli_obj_free( &y );
	bli_obj_free( &y_save );
}
Ejemplo n.º 25
0
void libblis_test_dotxaxpyf_experiment
     (
       test_params_t* params,
       test_op_t*     op,
       iface_t        iface,
       num_t          datatype,
       char*          pc_str,
       char*          sc_str,
       unsigned int   p_cur,
       double*        perf,
       double*        resid
     )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m, b_n;

	conj_t       conjat, conja, conjw, conjx;

	obj_t        alpha, at, a, w, x, beta, y, z;
	obj_t        y_save, z_save;

	cntx_t       cntx;

	// Initialize a context.
	bli_dotxaxpyf_cntx_init( &cntx );

	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Query the operation's fusing factor for the current datatype.
	b_n = bli_cntx_get_blksz_def_dt( datatype, BLIS_XF, &cntx );

	// Store the fusing factor so that the driver can retrieve the value
	// later when printing results.
	op->dim_aux[0] = b_n;

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_conj( pc_str[0], &conjat );
	bli_param_map_char_to_blis_conj( pc_str[1], &conja );
	bli_param_map_char_to_blis_conj( pc_str[2], &conjw );
	bli_param_map_char_to_blis_conj( pc_str[3], &conjx );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                                            sc_str[0], m, b_n, &a );
	libblis_test_vobj_create( params, datatype, sc_str[1], m, &w );
	libblis_test_vobj_create( params, datatype, sc_str[2], b_n, &x );
	libblis_test_vobj_create( params, datatype, sc_str[3], b_n, &y );
	libblis_test_vobj_create( params, datatype, sc_str[3], b_n, &y_save );
	libblis_test_vobj_create( params, datatype, sc_str[4], m, &z );
	libblis_test_vobj_create( params, datatype, sc_str[4], m, &z_save );

	// Set alpha.
	if ( bli_obj_is_real( y ) )
	{
		bli_setsc(  1.2,  0.0, &alpha );
		bli_setsc( -1.0,  0.0, &beta );
	}
	else
	{
		bli_setsc(  1.2,  0.1, &alpha );
		bli_setsc( -1.0, -0.1, &beta );
	}

	// Randomize A, w, x, y, and z, and save y and z.
	libblis_test_mobj_randomize( params, FALSE, &a );
	libblis_test_vobj_randomize( params, FALSE, &w );
	libblis_test_vobj_randomize( params, FALSE, &x );
	libblis_test_vobj_randomize( params, FALSE, &y );
	libblis_test_vobj_randomize( params, FALSE, &z );
	bli_copyv( &y, &y_save );
	bli_copyv( &z, &z_save );

	// Create an alias to a for at. (Note that it should NOT actually be
	// marked for transposition since the transposition is part of the dotxf
	// subproblem.)
	bli_obj_alias_to( a, at );

	// Apply the parameters.
	bli_obj_set_conj( conjat, at );
	bli_obj_set_conj( conja, a );
	bli_obj_set_conj( conjw, w );
	bli_obj_set_conj( conjx, x );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copyv( &y_save, &y );
		bli_copyv( &z_save, &z );

		time = bli_clock();

		libblis_test_dotxaxpyf_impl( iface,
		                             &alpha, &at, &a, &w, &x, &beta, &y, &z,
		                             &cntx );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m * b_n + 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( y ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_dotxaxpyf_check( params, &alpha, &at, &a, &w, &x, &beta, &y, &z, &y_save, &z_save, resid );

	// Zero out performance and residual if either output vector is empty.
	libblis_test_check_empty_problem( &y, perf, resid );
	libblis_test_check_empty_problem( &z, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &w );
	bli_obj_free( &x );
	bli_obj_free( &y );
	bli_obj_free( &z );
	bli_obj_free( &y_save );
	bli_obj_free( &z_save );

	// Finalize the context.
	bli_dotxaxpyf_cntx_finalize( &cntx );
}
Ejemplo n.º 26
0
void libblis_test_axpyf_check( obj_t*  alpha,
                               obj_t*  a,
                               obj_t*  x,
                               obj_t*  y,
                               obj_t*  y_orig,
                               double* resid )
{
	num_t  dt      = bli_obj_datatype( *y );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );

	dim_t  m       = bli_obj_vector_dim( *y );
	dim_t  b_n     = bli_obj_width( *a );

	dim_t  i;

	obj_t  a1, chi1, v;
	obj_t  alpha_chi1;
	obj_t  norm;

	double junk;

	//
	// Pre-conditions:
	// - a is randomized.
	// - x is randomized.
	// - y is randomized.
	// Note:
	// - alpha should have a non-zero imaginary component in the complex
	//   cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := y_orig + alpha * conja(A) * conjx(x)
	//
	// is functioning correctly if
	//
	//   normf( y - v )
	//
	// is negligible, where v contains y as computed by repeated calls to
	// axpyv.
	//

	bli_obj_scalar_init_detached( dt_real, &norm );
	bli_obj_scalar_init_detached( dt,      &alpha_chi1 );

	bli_obj_create( dt, m,   1, 0, 0, &v );

	bli_copyv( y_orig, &v );

	for ( i = 0; i < b_n; ++i )
	{
		bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 );
		bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, x, &chi1 );

		bli_copysc( &chi1, &alpha_chi1 );
		bli_mulsc( alpha, &alpha_chi1 );

		bli_axpyv( &alpha_chi1, &a1, &v );
	}

	bli_subv( y, &v );
	bli_normfv( &v, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &v );
}
Ejemplo n.º 27
0
void libblis_test_symv_experiment
     (
       test_params_t* params,
       test_op_t*     op,
       iface_t        iface,
       num_t          datatype,
       char*          pc_str,
       char*          sc_str,
       unsigned int   p_cur,
       double*        perf,
       double*        resid
     )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = DBL_MAX;
	double       time;

	dim_t        m;

	uplo_t       uploa;
	conj_t       conja;
	conj_t       conjx;

	obj_t        alpha, a, x, beta, y;
	obj_t        y_save;


	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_uplo( pc_str[0], &uploa );
	bli_param_map_char_to_blis_conj( pc_str[1], &conja );
	bli_param_map_char_to_blis_conj( pc_str[2], &conjx );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
	                          sc_str[0], m, m, &a );
	libblis_test_vobj_create( params, datatype,
	                          sc_str[1], m,    &x );
	libblis_test_vobj_create( params, datatype,
	                          sc_str[2], m,    &y );
	libblis_test_vobj_create( params, datatype,
	                          sc_str[2], m,    &y_save );

	// Set alpha and beta.
	if ( bli_obj_is_real( &y ) )
	{
		bli_setsc(  1.0,  0.0, &alpha );
		bli_setsc( -1.0,  0.0, &beta );
	}
	else
	{
		bli_setsc(  0.5,  0.5, &alpha );
		bli_setsc( -0.5,  0.5, &beta );
	}

	// Set the structure and uplo properties of A.
	bli_obj_set_struc( BLIS_SYMMETRIC, &a );
	bli_obj_set_uplo( uploa, &a );

	// Randomize A, make it densely symmetric, and zero the unstored triangle
	// to ensure the implementation reads only from the stored region.
	libblis_test_mobj_randomize( params, TRUE, &a );
	bli_mksymm( &a );
	bli_mktrim( &a );

	// Randomize x and y, and save y.
	libblis_test_vobj_randomize( params, TRUE, &x );
	libblis_test_vobj_randomize( params, TRUE, &y );
	bli_copyv( &y, &y_save );

	// Apply the remaining parameters.
	bli_obj_set_conj( conja, &a );
	bli_obj_set_conj( conjx, &x );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &y_save, &y );

		time = bli_clock();

		libblis_test_symv_impl( iface, &alpha, &a, &x, &beta, &y );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( &y ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_symv_check( params, &alpha, &a, &x, &beta, &y, &y_save, resid );

	// Zero out performance and residual if output vector is empty.
	libblis_test_check_empty_problem( &y, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &x );
	bli_obj_free( &y );
	bli_obj_free( &y_save );
}
Ejemplo n.º 28
0
void libblis_test_gemv_experiment( test_params_t* params,
                                   test_op_t*     op,
                                   iface_t        iface,
                                   num_t          datatype,
                                   char*          pc_str,
                                   char*          sc_str,
                                   unsigned int   p_cur,
                                   double*        perf,
                                   double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m, n;

	trans_t      transa;
	conj_t       conjx;

	obj_t        kappa;
	obj_t        alpha, a, x, beta, y;
	obj_t        y_save;


	// Map the dimension specifier to actual dimensions.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
	n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_trans( pc_str[0], &transa );
	bli_param_map_char_to_blis_conj( pc_str[1], &conjx );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &kappa );
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &beta );

	// Create test operands (vectors and/or matrices).
	libblis_test_mobj_create( params, datatype, transa,
	                          sc_str[0], m, n, &a );
	libblis_test_vobj_create( params, datatype,
	                          sc_str[1], n,    &x );
	libblis_test_vobj_create( params, datatype,
	                          sc_str[2], m,    &y );
	libblis_test_vobj_create( params, datatype,
	                          sc_str[2], m,    &y_save );

	// Set alpha and beta.
	if ( bli_obj_is_real( y ) )
	{
		bli_setsc(  2.0,  0.0, &alpha );
		bli_setsc( -1.0,  0.0, &beta );
	}
	else
	{
		bli_setsc(  0.0,  2.0, &alpha );
		bli_setsc(  0.0, -1.0, &beta );
	}

	// Initialize diagonal of matrix A.
	bli_setsc( 2.0, -1.0, &kappa );
	bli_setm( &BLIS_ZERO, &a );
	bli_setd( &kappa, &a );

	// Randomize x and y, and save y.
	bli_randv( &x );
	bli_randv( &y );
	bli_copyv( &y, &y_save );

	// Apply the parameters.
	bli_obj_set_conjtrans( transa, a );
	bli_obj_set_conj( conjx, x );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copym( &y_save, &y );

		time = bli_clock();

		libblis_test_gemv_impl( iface, &alpha, &a, &x, &beta, &y );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( y ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_gemv_check( &kappa, &alpha, &a, &x, &beta, &y, &y_save, resid );

	// Zero out performance and residual if output vector is empty.
	libblis_test_check_empty_problem( &y, perf, resid );

	// Free the test objects.
	bli_obj_free( &a );
	bli_obj_free( &x );
	bli_obj_free( &y );
	bli_obj_free( &y_save );
}
Ejemplo n.º 29
0
void libblis_test_trsm_check
     (
       test_params_t* params,
       side_t         side,
       obj_t*         alpha,
       obj_t*         a,
       obj_t*         b,
       obj_t*         b_orig,
       double*        resid
     )
{
	num_t  dt      = bli_obj_datatype( *b );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *b );

	dim_t  m       = bli_obj_length( *b );
	dim_t  n       = bli_obj_width( *b );

	obj_t  norm;
	obj_t  t, v, w, z;

	double junk;

	//
	// Pre-conditions:
	// - a is randomized and triangular.
	// - b_orig is randomized.
	// Note:
	// - alpha should have a non-zero imaginary component in the
	//   complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   B := alpha * inv(transa(A)) * B_orig    (side = left)
	//   B := alpha * B_orig * inv(transa(A))    (side = right)
	//
	// is functioning correctly if
	//
	//   normf( v - z )
	//
	// is negligible, where
	//
	//   v = B * t
	//
	//   z = ( alpha * inv(transa(A)) * B ) * t     (side = left)
	//     = alpha * inv(transa(A)) * B * t
	//     = alpha * inv(transa(A)) * w
	//
	//   z = ( alpha * B * inv(transa(A)) ) * t     (side = right)
	//     = alpha * B * tinv(ransa(A)) * t
	//     = alpha * B * w

	bli_obj_scalar_init_detached( dt_real, &norm );

	if ( bli_is_left( side ) )
	{
		bli_obj_create( dt, n, 1, 0, 0, &t );
		bli_obj_create( dt, m, 1, 0, 0, &v );
		bli_obj_create( dt, m, 1, 0, 0, &w );
		bli_obj_create( dt, m, 1, 0, 0, &z );
	}
	else // else if ( bli_is_left( side ) )
	{
		bli_obj_create( dt, n, 1, 0, 0, &t );
		bli_obj_create( dt, m, 1, 0, 0, &v );
		bli_obj_create( dt, n, 1, 0, 0, &w );
		bli_obj_create( dt, m, 1, 0, 0, &z );
	}

	libblis_test_vobj_randomize( params, TRUE, &t );

	bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &v );

	if ( bli_is_left( side ) )
	{
		bli_gemv( alpha, b_orig, &t, &BLIS_ZERO, &w );
		bli_trsv( &BLIS_ONE, a, &w );
		bli_copyv( &w, &z );
	}
	else
	{
		bli_copyv( &t, &w );
		bli_trsv( &BLIS_ONE, a, &w );
		bli_gemv( alpha, b_orig, &w, &BLIS_ZERO, &z );
	}

	bli_subv( &z, &v );
	bli_normfv( &v, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &t );
	bli_obj_free( &v );
	bli_obj_free( &w );
	bli_obj_free( &z );
}
Ejemplo n.º 30
0
void libblis_test_gemv_check( obj_t*  kappa,
                              obj_t*  alpha,
                              obj_t*  a,
                              obj_t*  x,
                              obj_t*  beta,
                              obj_t*  y,
                              obj_t*  y_orig,
                              double* resid )
{
	num_t  dt      = bli_obj_datatype( *y );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );

	conj_t conja   = bli_obj_conj_status( *a );

	dim_t  n_x     = bli_obj_vector_dim( *x );
	dim_t  m_y     = bli_obj_vector_dim( *y );

	dim_t  min_m_n = bli_min( m_y, n_x );

	obj_t  x_temp, y_temp;
	obj_t  kappac, norm;
	obj_t  xT_temp, yT_temp, yT;

	double junk;

	//
	// Pre-conditions:
	// - a is initialized to kappa along the diagonal.
	// - x is randomized.
	// - y_orig is randomized.
	// Note:
	// - alpha, beta, and kappa should have non-zero imaginary components in
	//   the complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := beta * y_orig + alpha * transa(A) * conjx(x)
	//
	// is functioning correctly if
	//
	//   normf( y - z )
	//
	// is negligible, where
	//
	//   z = beta * y_orig + alpha * conja(kappa) * x
	//

	bli_obj_scalar_init_detached_copy_of( dt, conja, kappa, &kappac );
	bli_obj_scalar_init_detached( dt_real, &norm );

	bli_obj_create( dt, n_x, 1, 0, 0, &x_temp );
	bli_obj_create( dt, m_y, 1, 0, 0, &y_temp );

	bli_copyv( x,      &x_temp );
	bli_copyv( y_orig, &y_temp );

	bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, 
	                       &x_temp, &xT_temp );
	bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, 
	                       &y_temp, &yT_temp );
	bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, 
	                       y, &yT );

	bli_scalv( &kappac, &xT_temp );
	bli_scalv( beta, &yT_temp );
	bli_axpyv( alpha, &xT_temp, &yT_temp );

	bli_subv( &yT_temp, &yT );
	bli_normfv( &yT, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &x_temp );
	bli_obj_free( &y_temp );
}