Beispiel #1
0
void bli_her2k_front
     (
       obj_t*  alpha,
       obj_t*  a,
       obj_t*  b,
       obj_t*  beta,
       obj_t*  c,
       cntx_t* cntx,
       cntl_t* cntl
     )
{
	bli_init_once();

	obj_t    alpha_conj;
	obj_t    c_local;
	obj_t    a_local;
	obj_t    bh_local;
	obj_t    b_local;
	obj_t    ah_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_her2k_check( alpha, a, b, beta, c, cntx );

	// If alpha is zero, scale by beta, zero the imaginary components of
	// the diagonal elements, and return.
	if ( bli_obj_equals( alpha, &BLIS_ZERO ) )
	{
		bli_scalm( beta, c );
		bli_setid( &BLIS_ZERO, c );
		return;
	}

	// Alias A, B, and C in case we need to apply transformations.
	bli_obj_alias_to( a, &a_local );
	bli_obj_alias_to( b, &b_local );
	bli_obj_alias_to( c, &c_local );
	bli_obj_set_as_root( &c_local );

	// For her2k, the first and second right-hand "B" operands are simply B'
	// and A'.
	bli_obj_alias_to( b, &bh_local );
	bli_obj_induce_trans( &bh_local );
	bli_obj_toggle_conj( &bh_local );
	bli_obj_alias_to( a, &ah_local );
	bli_obj_induce_trans( &ah_local );
	bli_obj_toggle_conj( &ah_local );

	// Initialize a conjugated copy of alpha.
	bli_obj_scalar_init_detached_copy_of( bli_obj_dt( a ),
	                                      BLIS_CONJUGATE,
	                                      alpha,
	                                      &alpha_conj );

	// An optimization: If C is stored by rows and the micro-kernel prefers
	// contiguous columns, or if C is stored by columns and the micro-kernel
	// prefers contiguous rows, transpose the entire operation to allow the
	// micro-kernel to access elements of C in its preferred manner.
	if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
	{
		bli_obj_swap( &a_local, &bh_local );
		bli_obj_swap( &b_local, &ah_local );

		bli_obj_induce_trans( &a_local );
		bli_obj_induce_trans( &bh_local );
		bli_obj_induce_trans( &b_local );
		bli_obj_induce_trans( &ah_local );

		bli_obj_induce_trans( &c_local );
	}

	// Record the threading for each level within the context.
	bli_cntx_set_thrloop_from_env( BLIS_HER2K, BLIS_LEFT, cntx,
                                   bli_obj_length( &c_local ),
                                   bli_obj_width( &c_local ),
                                   bli_obj_width( &a_local ) );

	// Invoke herk twice, using beta only the first time.

	// Invoke the internal back-end.
	bli_l3_thread_decorator
	(
	  bli_gemm_int,
	  BLIS_HERK, // operation family id
	  alpha,
	  &a_local,
	  &bh_local,
	  beta,
	  &c_local,
	  cntx,
	  cntl
	);

	bli_l3_thread_decorator
	(
	  bli_gemm_int,
	  BLIS_HERK, // operation family id
	  &alpha_conj,
	  &b_local,
	  &ah_local,
	  &BLIS_ONE,
	  &c_local,
	  cntx,
	  cntl
	);

	// The Hermitian rank-2k product was computed as A*B'+B*A', even for
	// the diagonal elements. Mathematically, the imaginary components of
	// diagonal elements of a Hermitian rank-2k product should always be
	// zero. However, in practice, they sometimes accumulate meaningless
	// non-zero values. To prevent this, we explicitly set those values
	// to zero before returning.
	bli_setid( &BLIS_ZERO, &c_local );
}
Beispiel #2
0
void libblis_test_trsm_check
     (
       test_params_t* params,
       side_t         side,
       obj_t*         alpha,
       obj_t*         a,
       obj_t*         b,
       obj_t*         b_orig,
       double*        resid
     )
{
	num_t  dt      = bli_obj_dt( b );
	num_t  dt_real = bli_obj_dt_proj_to_real( b );

	dim_t  m       = bli_obj_length( b );
	dim_t  n       = bli_obj_width( b );

	obj_t  norm;
	obj_t  t, v, w, z;

	double junk;

	//
	// Pre-conditions:
	// - a is randomized and triangular.
	// - b_orig is randomized.
	// Note:
	// - alpha should have a non-zero imaginary component in the
	//   complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   B := alpha * inv(transa(A)) * B_orig    (side = left)
	//   B := alpha * B_orig * inv(transa(A))    (side = right)
	//
	// is functioning correctly if
	//
	//   normf( v - z )
	//
	// is negligible, where
	//
	//   v = B * t
	//
	//   z = ( alpha * inv(transa(A)) * B ) * t     (side = left)
	//     = alpha * inv(transa(A)) * B * t
	//     = alpha * inv(transa(A)) * w
	//
	//   z = ( alpha * B * inv(transa(A)) ) * t     (side = right)
	//     = alpha * B * tinv(ransa(A)) * t
	//     = alpha * B * w

	bli_obj_scalar_init_detached( dt_real, &norm );

	if ( bli_is_left( side ) )
	{
		bli_obj_create( dt, n, 1, 0, 0, &t );
		bli_obj_create( dt, m, 1, 0, 0, &v );
		bli_obj_create( dt, m, 1, 0, 0, &w );
		bli_obj_create( dt, m, 1, 0, 0, &z );
	}
	else // else if ( bli_is_left( side ) )
	{
		bli_obj_create( dt, n, 1, 0, 0, &t );
		bli_obj_create( dt, m, 1, 0, 0, &v );
		bli_obj_create( dt, n, 1, 0, 0, &w );
		bli_obj_create( dt, m, 1, 0, 0, &z );
	}

	libblis_test_vobj_randomize( params, TRUE, &t );

	bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &v );

	if ( bli_is_left( side ) )
	{
		bli_gemv( alpha, b_orig, &t, &BLIS_ZERO, &w );
		bli_trsv( &BLIS_ONE, a, &w );
		bli_copyv( &w, &z );
	}
	else
	{
		bli_copyv( &t, &w );
		bli_trsv( &BLIS_ONE, a, &w );
		bli_gemv( alpha, b_orig, &w, &BLIS_ZERO, &z );
	}

	bli_subv( &z, &v );
	bli_normfv( &v, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &t );
	bli_obj_free( &v );
	bli_obj_free( &w );
	bli_obj_free( &z );
}
Beispiel #3
0
void bli_ger_int( conj_t  conjx,
                  conj_t  conjy,
                  obj_t*  alpha,
                  obj_t*  x,
                  obj_t*  y,
                  obj_t*  a,
                  cntx_t* cntx,
                  ger_t*  cntl )
{
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;
	obj_t     alpha_local;
	obj_t     x_local;
	obj_t     y_local;
	obj_t     a_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_ger_check( alpha, x, y, a );

	// If A has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( a ) ) return;

	// If x or y has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( x ) ||
	     bli_obj_has_zero_dim( y ) ) return;

	// Alias the objects, applying conjx and conjy to x and y, respectively.
	bli_obj_alias_with_conj( conjx, x, &x_local );
	bli_obj_alias_with_conj( conjy, y, &y_local );
	bli_obj_alias_to( a, &a_local );

	// If matrix A is marked for conjugation, we interpret this as a request
	// to apply a conjugation to the other operands.
	if ( bli_obj_has_conj( &a_local ) )
	{
		bli_obj_toggle_conj( &a_local );

		bli_obj_toggle_conj( &x_local );
		bli_obj_toggle_conj( &y_local );

		bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
		                                      BLIS_CONJUGATE,
		                                      alpha,
		                                      &alpha_local );
	}
	else
	{
		bli_obj_alias_to( *alpha, alpha_local );
	}

	// If we are about the call a leaf-level implementation, and matrix A
	// still needs a transposition, then we must induce one by swapping the
	// strides and dimensions.
	if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( &a_local ) )
	{
		bli_obj_induce_trans( &a_local );
		bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
	}

	// Extract the variant number and implementation type.
	n = bli_cntl_var_num( cntl );
	i = bli_cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( &alpha_local,
	   &x_local,
	   &y_local,
	   &a_local,
	   cntx,
	   cntl );
}
Beispiel #4
0
void libblis_test_setv_check
     (
       test_params_t* params,
       obj_t*         beta,
       obj_t*         x,
       double*        resid
     )
{
	num_t dt_x     = bli_obj_dt( x );
	dim_t m_x      = bli_obj_vector_dim( x );
	inc_t inc_x    = bli_obj_vector_inc( x );
	void* buf_x    = bli_obj_buffer_at_off( x );
	void* buf_beta = bli_obj_buffer_for_1x1( dt_x, beta );
	dim_t i;

	*resid = 0.0;

	//
	// The easiest way to check that setv was successful is to confirm
	// that each element of x is equal to beta.
	//

	if      ( bli_obj_is_float( x ) )
	{
		float*    chi1      = buf_x;
		float*    beta_cast = buf_beta;

		for ( i = 0; i < m_x; ++i )
		{
			if ( !bli_seq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			
			chi1 += inc_x;
		}
	}
	else if ( bli_obj_is_double( x ) )
	{
		double*   chi1      = buf_x;
		double*   beta_cast = buf_beta;

		for ( i = 0; i < m_x; ++i )
		{
			if ( !bli_deq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			
			chi1 += inc_x;
		}
	}
	else if ( bli_obj_is_scomplex( x ) )
	{
		scomplex* chi1      = buf_x;
		scomplex* beta_cast = buf_beta;

		for ( i = 0; i < m_x; ++i )
		{
			if ( !bli_ceq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			
			chi1 += inc_x;
		}
	}
	else // if ( bli_obj_is_dcomplex( x ) )
	{
		dcomplex* chi1      = buf_x;
		dcomplex* beta_cast = buf_beta;

		for ( i = 0; i < m_x; ++i )
		{
			if ( !bli_zeq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			
			chi1 += inc_x;
		}
	}
}
Beispiel #5
0
void libblis_test_syr_check
     (
       test_params_t* params,
       obj_t*         alpha,
       obj_t*         x,
       obj_t*         a,
       obj_t*         a_orig,
       double*        resid
     )
{
	num_t  dt      = bli_obj_dt( a );
	num_t  dt_real = bli_obj_dt_proj_to_real( a );

	dim_t  m_a     = bli_obj_length( a );

	obj_t  xt, t, v, w;
	obj_t  rho, norm;

	double junk;

	//
	// Pre-conditions:
	// - x is randomized.
	// - a is randomized and symmetric.
	// Note:
	// - alpha should have a non-zero imaginary component in the
	//   complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   A := A_orig + alpha * conjx(x) * conjx(x)^T
	//
	// is functioning correctly if
	//
	//   normf( v - w )
	//
	// is negligible, where
	//
	//   v = A * t
	//   w = ( A_orig + alpha * conjx(x) * conjx(x)^T ) * t
	//     =   A_orig * t + alpha * conjx(x) * conjx(x)^T * t
	//     =   A_orig * t + alpha * conjx(x) * rho
	//     =   A_orig * t + w
	//

	bli_mksymm( a );
	bli_mksymm( a_orig );
	bli_obj_set_struc( BLIS_GENERAL, a );
	bli_obj_set_struc( BLIS_GENERAL, a_orig );
	bli_obj_set_uplo( BLIS_DENSE, a );
	bli_obj_set_uplo( BLIS_DENSE, a_orig );

	bli_obj_scalar_init_detached( dt,      &rho );
	bli_obj_scalar_init_detached( dt_real, &norm );

	bli_obj_create( dt, m_a, 1, 0, 0, &t );
	bli_obj_create( dt, m_a, 1, 0, 0, &v );
	bli_obj_create( dt, m_a, 1, 0, 0, &w );

	bli_obj_alias_to( x, &xt );

	libblis_test_vobj_randomize( params, TRUE, &t );

	bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v );

	bli_dotv( &xt, &t, &rho );
	bli_mulsc( alpha, &rho );
	bli_scal2v( &rho, x, &w );
	bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w );

	bli_subv( &w, &v );
	bli_normfv( &v, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &t );
	bli_obj_free( &v );
	bli_obj_free( &w );
}
Beispiel #6
0
void libblis_test_gemv_check
     (
       test_params_t* params,
       obj_t*         kappa,
       obj_t*         alpha,
       obj_t*         a,
       obj_t*         x,
       obj_t*         beta,
       obj_t*         y,
       obj_t*         y_orig,
       double*        resid
     )
{
	num_t  dt      = bli_obj_dt( y );
	num_t  dt_real = bli_obj_dt_proj_to_real( y );

	conj_t conja   = bli_obj_conj_status( a );

	dim_t  n_x     = bli_obj_vector_dim( x );
	dim_t  m_y     = bli_obj_vector_dim( y );

	dim_t  min_m_n = bli_min( m_y, n_x );

	obj_t  x_temp, y_temp;
	obj_t  kappac, norm;
	obj_t  xT_temp, yT_temp, yT;

	double junk;

	//
	// Pre-conditions:
	// - a is initialized to kappa along the diagonal.
	// - x is randomized.
	// - y_orig is randomized.
	// Note:
	// - alpha, beta, and kappa should have non-zero imaginary components in
	//   the complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := beta * y_orig + alpha * transa(A) * conjx(x)
	//
	// is functioning correctly if
	//
	//   normf( y - z )
	//
	// is negligible, where
	//
	//   z = beta * y_orig + alpha * conja(kappa) * x
	//

	bli_obj_scalar_init_detached_copy_of( dt, conja, kappa, &kappac );
	bli_obj_scalar_init_detached( dt_real, &norm );

	bli_obj_create( dt, n_x, 1, 0, 0, &x_temp );
	bli_obj_create( dt, m_y, 1, 0, 0, &y_temp );

	bli_copyv( x,      &x_temp );
	bli_copyv( y_orig, &y_temp );

	bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, 
	                       &x_temp, &xT_temp );
	bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, 
	                       &y_temp, &yT_temp );
	bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, 
	                       y, &yT );

	bli_scalv( &kappac, &xT_temp );
	bli_scalv( beta, &yT_temp );
	bli_axpyv( alpha, &xT_temp, &yT_temp );

	bli_subv( &yT_temp, &yT );
	bli_normfv( &yT, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &x_temp );
	bli_obj_free( &y_temp );
}
Beispiel #7
0
void bli_packm_unb_var1
     (
       obj_t*  c,
       obj_t*  p,
       cntx_t* cntx,
       cntl_t* cntl,
       thrinfo_t* thread
     )
{
	num_t     dt_cp     = bli_obj_dt( c );

	struc_t   strucc    = bli_obj_struc( c );
	doff_t    diagoffc  = bli_obj_diag_offset( c );
	diag_t    diagc     = bli_obj_diag( c );
	uplo_t    uploc     = bli_obj_uplo( c );
	trans_t   transc    = bli_obj_conjtrans_status( c );

	dim_t     m_p       = bli_obj_length( p );
	dim_t     n_p       = bli_obj_width( p );
	dim_t     m_max_p   = bli_obj_padded_length( p );
	dim_t     n_max_p   = bli_obj_padded_width( p );

	void*     buf_c     = bli_obj_buffer_at_off( c );
	inc_t     rs_c      = bli_obj_row_stride( c );
	inc_t     cs_c      = bli_obj_col_stride( c );

	void*     buf_p     = bli_obj_buffer_at_off( p );
	inc_t     rs_p      = bli_obj_row_stride( p );
	inc_t     cs_p      = bli_obj_col_stride( p );

	void*     buf_kappa;

	FUNCPTR_T f;


	// This variant assumes that the computational kernel will always apply
	// the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
	// for kappa so that the underlying packm implementation does not scale
	// during packing.
	buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_cp];

    if( bli_thread_am_ochief( thread ) ) {
        // Invoke the function.
        f
		(
		  strucc,
          diagoffc,
          diagc,
          uploc,
          transc,
          m_p,
          n_p,
          m_max_p,
          n_max_p,
          buf_kappa,
          buf_c, rs_c, cs_c,
          buf_p, rs_p, cs_p,
		  cntx
		);
    }
}