Example #1
0
//
// Define object-based interface.
//
void bli_scalv( obj_t*  alpha,
                obj_t*  x )
{
	num_t     dt        = bli_obj_datatype( *x );

	dim_t     n         = bli_obj_vector_dim( *x );

	inc_t     inc_x     = bli_obj_vector_inc( *x );
	void*     buf_x     = bli_obj_buffer_at_off( *x );

	obj_t     alpha_local;
	void*     buf_alpha;

	FUNCPTR_T f         = ftypes[dt];

	if ( bli_error_checking_is_enabled() )
	    bli_scalv_check( alpha, x );

	// Create a local copy-cast of alpha (and apply internal conjugation
	// if needed).
	bli_obj_scalar_init_detached_copy_of( dt,
	                                      BLIS_NO_CONJUGATE,
	                                      alpha,
	                                      &alpha_local );

	// Extract the scalar buffer.
	buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local );

	// Invoke the void pointer-based function.
	f( BLIS_NO_CONJUGATE, // conjugation applied during copy-cast.
	   n,
	   buf_alpha,
	   buf_x, inc_x );
}
Example #2
0
void libblis_test_addv_check( obj_t*  alpha,
                              obj_t*  beta,
                              obj_t*  x,
                              obj_t*  y,
                              double* resid )
{
	num_t  dt      = bli_obj_datatype( *x );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *x );
	dim_t  m       = bli_obj_vector_dim( *x );

	conj_t conjx   = bli_obj_conj_status( *x );

	obj_t  aplusb;
	obj_t  alpha_conj;
	obj_t  norm_r, m_r, temp_r;

	double junk;

	//
	// Pre-conditions:
	// - x is set to alpha.
	// - y_orig is set to beta.
	// Note:
	// - alpha and beta should have non-zero imaginary components in the
	//   complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := y_orig + conjx(x)
	//
	// is functioning correctly if
	//
	//   fnormv(y) - sqrt( absqsc( beta + conjx(alpha) ) * m )
	//
	// is negligible.
	//

	bli_obj_scalar_init_detached( dt,      &aplusb );
	bli_obj_scalar_init_detached( dt_real, &temp_r );
	bli_obj_scalar_init_detached( dt_real, &norm_r );
	bli_obj_scalar_init_detached( dt_real, &m_r );

	bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj );

	bli_fnormv( y, &norm_r );

	bli_copysc( beta, &aplusb );
	bli_addsc( &alpha_conj, &aplusb );

	bli_setsc( ( double )m, 0.0, &m_r );

	bli_absqsc( &aplusb, &temp_r );
	bli_mulsc( &m_r, &temp_r );
	bli_sqrtsc( &temp_r, &temp_r );
	bli_subsc( &temp_r, &norm_r );

	bli_getsc( &norm_r, resid, &junk );
}
Example #3
0
void bli_obj_scalar_attach( conj_t conj,
                            obj_t* alpha,
                            obj_t* a )
{
	obj_t alpha_cast;

	// Make a copy-cast of alpha of the same datatype as A. This step
	// gives us the opportunity to conjugate and/or typecast alpha.
	bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
	                                      conj,
	                                      alpha,
	                                      &alpha_cast );

	// Copy the internal scalar in alpha_cast to A.
	bli_obj_copy_internal_scalar( alpha_cast, *a );
}
Example #4
0
void bli_obj_scalar_apply_scalar( obj_t* alpha,
                                  obj_t* a )
{
	obj_t alpha_cast;
	obj_t scalar_a;

	// Make a copy-cast of alpha of the same datatype as A. This step
	// gives us the opportunity to typecast alpha.
	bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
	                                      BLIS_NO_CONJUGATE,
	                                      alpha,
	                                      &alpha_cast );
	// Detach the scalar from A.
	bli_obj_scalar_detach( a, &scalar_a );

	// Scale the detached scalar by alpha.
	bli_mulsc( &alpha_cast, &scalar_a );

	// Copy the internal scalar in scalar_a to A.
	bli_obj_copy_internal_scalar( scalar_a, *a );
}
Example #5
0
//
// Define object-based interface.
//
void bli_setm( obj_t* beta,
               obj_t* x )
{
	num_t dt_x;
	obj_t beta_local;

	if ( bli_error_checking_is_enabled() )
		bli_setm_check( beta, x );

	// Use the datatype of x as the target type for beta (since we do
	// not assume mixed domain/type support is enabled).
	dt_x = bli_obj_datatype( *x );

	// Create an object to hold a copy-cast of beta.
	bli_obj_scalar_init_detached_copy_of( dt_x,
	                                      BLIS_NO_CONJUGATE,
	                                      beta,
	                                      &beta_local );

	bli_setm_unb_var1( &beta_local, x );
}
Example #6
0
void bli_syr2_front
     (
       obj_t*  alpha,
       obj_t*  x,
       obj_t*  y,
       obj_t*  c,
       cntx_t* cntx
     )
{
	her2_t* her2_cntl;
	num_t   dt_targ_x;
	num_t   dt_targ_y;
	//num_t   dt_targ_c;
	bool_t  x_has_unit_inc;
	bool_t  y_has_unit_inc;
	bool_t  c_has_unit_inc;
	obj_t   alpha_local;
	num_t   dt_alpha;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_syr2_check( alpha, x, y, c );


	// Query the target datatypes of each object.
	dt_targ_x = bli_obj_target_dt( x );
	dt_targ_y = bli_obj_target_dt( y );
	//dt_targ_c = bli_obj_target_dt( c );

	// Determine whether each operand with unit stride.
	x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
	y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
	c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
	                   bli_obj_is_col_stored( c ) );


	// Create an object to hold a copy-cast of alpha. Notice that we use
	// the type union of the datatypes of x and y.
	dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y );
	bli_obj_scalar_init_detached_copy_of( dt_alpha,
	                                      BLIS_NO_CONJUGATE,
	                                      alpha,
	                                      &alpha_local );


	// If all operands have unit stride, we choose a control tree for calling
	// the unblocked implementation directly without any blocking.
	if ( x_has_unit_inc &&
	     y_has_unit_inc &&
	     c_has_unit_inc )
	{
		// We use two control trees to handle the four cases corresponding to
		// combinations of upper/lower triangular storage and row/column-storage.
		// The row-stored lower triangular and column-stored upper triangular
		// trees are identical. Same for the remaining two trees.
		if ( bli_obj_is_lower( c ) )
		{
			if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
			else                               her2_cntl = her2_cntl_bs_ke_lcol_urow;
		}
		else // if ( bli_obj_is_upper( c ) )
		{
			if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
			else                               her2_cntl = her2_cntl_bs_ke_lrow_ucol;
		}
	}
	else
	{
		// Mark objects with unit stride as already being packed. This prevents
		// unnecessary packing from happening within the blocked algorithm.
		if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
		if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
		if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );

		// Here, we make a similar choice as above, except that (1) we look
		// at storage tilt, and (2) we choose a tree that performs blocking.
		if ( bli_obj_is_lower( c ) )
		{
			if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
			else                               her2_cntl = her2_cntl_ge_lcol_urow;
		}
		else // if ( bli_obj_is_upper( c ) )
		{
			if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
			else                               her2_cntl = her2_cntl_ge_lrow_ucol;
		}
	}

	// Invoke the internal back-end with the copy-cast scalar and the
	// chosen control tree. Set conjh to BLIS_NO_CONJUGATE to invoke the
	// symmetric (and not Hermitian) algorithms.
	bli_her2_int( BLIS_NO_CONJUGATE,
	              &alpha_local,
	              &alpha_local,
	              x,
	              y,
	              c,
	              cntx,
	              her2_cntl );
}
Example #7
0
void bli_her2k_front( obj_t*  alpha,
                      obj_t*  a,
                      obj_t*  b,
                      obj_t*  beta,
                      obj_t*  c,
                      herk_t* cntl )
{
	obj_t    alpha_conj;
	obj_t    c_local;
	obj_t    a_local;
	obj_t    bh_local;
	obj_t    b_local;
	obj_t    ah_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_her2k_check( alpha, a, b, beta, c );

	// If alpha is zero, scale by beta and return.
	if ( bli_obj_equals( alpha, &BLIS_ZERO ) )
	{
		bli_scalm( beta, c );
		return;
	}

	// Alias A, B, and C in case we need to apply transformations.
	bli_obj_alias_to( *a, a_local );
	bli_obj_alias_to( *b, b_local );
	bli_obj_alias_to( *c, c_local );
	bli_obj_set_as_root( c_local );

	// For her2k, the first and second right-hand "B" operands are simply B'
	// and A'.
	bli_obj_alias_to( *b, bh_local );
	bli_obj_induce_trans( bh_local );
	bli_obj_toggle_conj( bh_local );
	bli_obj_alias_to( *a, ah_local );
	bli_obj_induce_trans( ah_local );
	bli_obj_toggle_conj( ah_local );

	// Initialize a conjugated copy of alpha.
	bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
	                                      BLIS_CONJUGATE,
	                                      alpha,
	                                      &alpha_conj );

	// An optimization: If C is row-stored, transpose the entire operation
	// so as to allow the macro-kernel more favorable access patterns
	// through C. (The effect of the transposition of A and A' is negligible
	// because those operands are always packed to contiguous memory.)
	if ( bli_obj_is_row_stored( c_local ) )
	{
		bli_obj_swap( a_local, bh_local );
		bli_obj_swap( b_local, ah_local );

		bli_obj_induce_trans( a_local );
		bli_obj_induce_trans( bh_local );
		bli_obj_induce_trans( b_local );
		bli_obj_induce_trans( ah_local );

		bli_obj_induce_trans( c_local );
	}

#if 0
	// Invoke the internal back-end.
	bli_her2k_int( alpha,
	               &a_local,
	               &bh_local,
	               &alpha_conj,
	               &b_local,
	               &ah_local,
	               beta,
	               &c_local,
	               cntl );
#else
	// Invoke herk twice, using beta only the first time.
	bli_herk_int( alpha,
	              &a_local,
	              &bh_local,
	              beta,
	              &c_local,
	              cntl );

	bli_herk_int( &alpha_conj,
	              &b_local,
	              &ah_local,
	              &BLIS_ONE,
	              &c_local,
	              cntl );
#endif
}
Example #8
0
void bli_her2k_front( obj_t*  alpha,
                      obj_t*  a,
                      obj_t*  b,
                      obj_t*  beta,
                      obj_t*  c,
                      gemm_t* cntl )
{
	obj_t    alpha_conj;
	obj_t    c_local;
	obj_t    a_local;
	obj_t    bh_local;
	obj_t    b_local;
	obj_t    ah_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_her2k_check( alpha, a, b, beta, c );

	// If alpha is zero, scale by beta, zero the imaginary components of
	// the diagonal elements, and return.
	if ( bli_obj_equals( alpha, &BLIS_ZERO ) )
	{
		bli_scalm( beta, c );
		bli_setid( &BLIS_ZERO, c );
		return;
	}

	// Alias A, B, and C in case we need to apply transformations.
	bli_obj_alias_to( *a, a_local );
	bli_obj_alias_to( *b, b_local );
	bli_obj_alias_to( *c, c_local );
	bli_obj_set_as_root( c_local );

	// For her2k, the first and second right-hand "B" operands are simply B'
	// and A'.
	bli_obj_alias_to( *b, bh_local );
	bli_obj_induce_trans( bh_local );
	bli_obj_toggle_conj( bh_local );
	bli_obj_alias_to( *a, ah_local );
	bli_obj_induce_trans( ah_local );
	bli_obj_toggle_conj( ah_local );

	// Initialize a conjugated copy of alpha.
	bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
	                                      BLIS_CONJUGATE,
	                                      alpha,
	                                      &alpha_conj );

	// An optimization: If C is stored by rows and the micro-kernel prefers
	// contiguous columns, or if C is stored by columns and the micro-kernel
	// prefers contiguous rows, transpose the entire operation to allow the
	// micro-kernel to access elements of C in its preferred manner.
	if (
	     ( bli_obj_is_row_stored( c_local ) &&
	       bli_func_prefers_contig_cols( bli_obj_datatype( c_local ),
	                                     bli_gemm_cntl_ukrs( cntl ) ) ) ||
	     ( bli_obj_is_col_stored( c_local ) &&
	       bli_func_prefers_contig_rows( bli_obj_datatype( c_local ),
	                                     bli_gemm_cntl_ukrs( cntl ) ) )
	   )
	{
		bli_obj_swap( a_local, bh_local );
		bli_obj_swap( b_local, ah_local );

		bli_obj_induce_trans( a_local );
		bli_obj_induce_trans( bh_local );
		bli_obj_induce_trans( b_local );
		bli_obj_induce_trans( ah_local );

		bli_obj_induce_trans( c_local );
	}

#if 0
	// Invoke the internal back-end.
	bli_her2k_int( alpha,
	               &a_local,
	               &bh_local,
	               &alpha_conj,
	               &b_local,
	               &ah_local,
	               beta,
	               &c_local,
	               cntl );
#else

	// Invoke herk twice, using beta only the first time.
    herk_thrinfo_t** infos = bli_create_herk_thrinfo_paths();
    dim_t n_threads = thread_num_threads( infos[0] );

    // Invoke the internal back-end.
    bli_level3_thread_decorator( n_threads,   
                                 (level3_int_t) bli_herk_int, 
                                 alpha, 
                                 &a_local,  
                                 &bh_local,  
                                 beta, 
                                 &c_local,  
                                 (void*) cntl, 
                                 (void**) infos );

    bli_level3_thread_decorator( n_threads,   
                                 (level3_int_t) bli_herk_int, 
                                 &alpha_conj, 
                                 &b_local,  
                                 &ah_local,  
                                 &BLIS_ONE, 
                                 &c_local,  
                                 (void*) cntl, 
                                 (void**) infos );

    bli_herk_thrinfo_free_paths( infos, n_threads );

#endif

	// The Hermitian rank-2k product was computed as A*B'+B*A', even for
	// the diagonal elements. Mathematically, the imaginary components of
	// diagonal elements of a Hermitian rank-2k product should always be
	// zero. However, in practice, they sometimes accumulate meaningless
	// non-zero values. To prevent this, we explicitly set those values
	// to zero before returning.
	bli_setid( &BLIS_ZERO, &c_local );

}
Example #9
0
void libblis_test_gemv_check( obj_t*  kappa,
                              obj_t*  alpha,
                              obj_t*  a,
                              obj_t*  x,
                              obj_t*  beta,
                              obj_t*  y,
                              obj_t*  y_orig,
                              double* resid )
{
	num_t  dt      = bli_obj_datatype( *y );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );

	conj_t conja   = bli_obj_conj_status( *a );

	dim_t  n_x     = bli_obj_vector_dim( *x );
	dim_t  m_y     = bli_obj_vector_dim( *y );

	dim_t  min_m_n = bli_min( m_y, n_x );

	obj_t  x_temp, y_temp;
	obj_t  kappac, norm;
	obj_t  xT_temp, yT_temp, yT;

	double junk;

	//
	// Pre-conditions:
	// - a is initialized to kappa along the diagonal.
	// - x is randomized.
	// - y_orig is randomized.
	// Note:
	// - alpha, beta, and kappa should have non-zero imaginary components in
	//   the complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := beta * y_orig + alpha * transa(A) * conjx(x)
	//
	// is functioning correctly if
	//
	//   normf( y - z )
	//
	// is negligible, where
	//
	//   z = beta * y_orig + alpha * conja(kappa) * x
	//

	bli_obj_scalar_init_detached_copy_of( dt, conja, kappa, &kappac );
	bli_obj_scalar_init_detached( dt_real, &norm );

	bli_obj_create( dt, n_x, 1, 0, 0, &x_temp );
	bli_obj_create( dt, m_y, 1, 0, 0, &y_temp );

	bli_copyv( x,      &x_temp );
	bli_copyv( y_orig, &y_temp );

	bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, 
	                       &x_temp, &xT_temp );
	bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, 
	                       &y_temp, &yT_temp );
	bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, 
	                       y, &yT );

	bli_scalv( &kappac, &xT_temp );
	bli_scalv( beta, &yT_temp );
	bli_axpyv( alpha, &xT_temp, &yT_temp );

	bli_subv( &yT_temp, &yT );
	bli_normfv( &yT, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &x_temp );
	bli_obj_free( &y_temp );
}
Example #10
0
void bli_hemv( obj_t*  alpha,
               obj_t*  a,
               obj_t*  x,
               obj_t*  beta,
               obj_t*  y )
{
	hemv_t* hemv_cntl;
	num_t   dt_targ_a;
	num_t   dt_targ_x;
	num_t   dt_targ_y;
	bool_t  a_has_unit_inc;
	bool_t  x_has_unit_inc;
	bool_t  y_has_unit_inc;
	obj_t   alpha_local;
	obj_t   beta_local;
	num_t   dt_alpha;
	num_t   dt_beta;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_hemv_check( alpha, a, x, beta, y );


	// Query the target datatypes of each object.
	dt_targ_a = bli_obj_target_datatype( *a );
	dt_targ_x = bli_obj_target_datatype( *x );
	dt_targ_y = bli_obj_target_datatype( *y );

	// Determine whether each operand with unit stride.
	a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
	                   bli_obj_is_col_stored( *a ) );
	x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
	y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );


	// Create an object to hold a copy-cast of alpha. Notice that we use
	// the type union of the target datatypes of a and x to prevent any
	// unnecessary loss of information during the computation.
	dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
	bli_obj_scalar_init_detached_copy_of( dt_alpha,
	                             BLIS_NO_CONJUGATE,
	                             alpha,
	                             &alpha_local );

	// Create an object to hold a copy-cast of beta. Notice that we use
	// the datatype of y. Here's why: If y is real and beta is complex,
	// there is no reason to keep beta_local in the complex domain since
	// the complex part of beta*y will not be stored. If y is complex and
	// beta is real then beta is harmlessly promoted to complex.
	dt_beta = dt_targ_y;
	bli_obj_scalar_init_detached_copy_of( dt_beta,
	                             BLIS_NO_CONJUGATE,
	                             beta,
	                             &beta_local );


	// If all operands have unit stride, we choose a control tree for calling
	// the unblocked implementation directly without any blocking.
	if ( a_has_unit_inc &&
	     x_has_unit_inc &&
	     y_has_unit_inc )
	{
		// We use two control trees to handle the four cases corresponding to
		// combinations of upper/lower triangular storage and row/column-storage.
		// The row-stored lower triangular and column-stored upper triangular
		// trees are identical. Same for the remaining two trees.
		if ( bli_obj_is_lower( *a ) )
		{
			if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
			else                               hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
		}
		else // if ( bli_obj_is_upper( *a ) )
		{
			if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
			else                               hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
		}
	}
	else
	{
		// Mark objects with unit stride as already being packed. This prevents
		// unnecessary packing from happening within the blocked algorithm.
		if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
		if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
		if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );

		// Here, we make a similar choice as above, except that (1) we look
		// at storage tilt, and (2) we choose a tree that performs blocking.
		if ( bli_obj_is_lower( *a ) )
		{
			if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
			else                               hemv_cntl = hemv_cntl_ge_lcol_urow;
		}
		else // if ( bli_obj_is_upper( *a ) )
		{
			if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
			else                               hemv_cntl = hemv_cntl_ge_lrow_ucol;
		}
	}


	// Invoke the internal back-end with the copy-casts of scalars and the
	// chosen control tree. Set conjh to BLIS_CONJUGATE to invoke the
	// Hermitian (and not symmetric) algorithms.
	bli_hemv_int( BLIS_CONJUGATE,
	              &alpha_local,
	              a,
	              x,
	              &beta_local,
	              y,
	              hemv_cntl );
}
Example #11
0
void libblis_test_her2k_check
     (
       test_params_t* params,
       obj_t*         alpha,
       obj_t*         a,
       obj_t*         b,
       obj_t*         beta,
       obj_t*         c,
       obj_t*         c_orig,
       double*        resid
     )
{
	num_t  dt      = bli_obj_datatype( *c );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *c );

	dim_t  m       = bli_obj_length( *c );
	dim_t  k       = bli_obj_width_after_trans( *a );

	obj_t  alphac, ah, bh;
	obj_t  norm;
	obj_t  t, v, w1, w2, z;

	double junk;

	//
	// Pre-conditions:
	// - a is randomized.
	// - b is randomized.
	// - c_orig is randomized and Hermitian.
	// Note:
	// - alpha should have a non-zero imaginary component in the
	//   complex cases in order to more fully exercise the implementation.
	// - beta must be real-valued.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   C := beta * C_orig + alpha * transa(A) * transb(B)^H + conj(alpha) * transb(B) * transa(A)^H
	//
	// is functioning correctly if
	//
	//   normf( v - z )
	//
	// is negligible, where
	//
	//   v = C * t
	//   z = ( beta * C_orig + alpha * transa(A) * transb(B)^H + conj(alpha) * transb(B) * transa(A)^H ) * t
	//     = beta * C_orig * t + alpha * transa(A) * transb(B)^H * t + conj(alpha) * transb(B) * transa(A)^H * t
	//     = beta * C_orig * t + alpha * transa(A) * transb(B)^H * t + conj(alpha) * transb(B) * w2
	//     = beta * C_orig * t + alpha * transa(A) * w1              + conj(alpha) * transb(B) * w2
	//     = beta * C_orig * t + alpha * transa(A) * w1              + z
	//     = beta * C_orig * t + z
	//

	bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *a, ah );
	bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *b, bh );

	bli_obj_scalar_init_detached( dt_real, &norm );
	bli_obj_scalar_init_detached_copy_of( dt, BLIS_CONJUGATE, alpha, &alphac );

	bli_obj_create( dt, m, 1, 0, 0, &t );
	bli_obj_create( dt, m, 1, 0, 0, &v );
	bli_obj_create( dt, k, 1, 0, 0, &w1 );
	bli_obj_create( dt, k, 1, 0, 0, &w2 );
	bli_obj_create( dt, m, 1, 0, 0, &z );

	libblis_test_vobj_randomize( params, TRUE, &t );

	bli_hemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v );

	bli_gemv( &BLIS_ONE, &ah, &t, &BLIS_ZERO, &w2 );
	bli_gemv( &BLIS_ONE, &bh, &t, &BLIS_ZERO, &w1 );
	bli_gemv( alpha, a, &w1, &BLIS_ZERO, &z );
	bli_gemv( &alphac, b, &w2, &BLIS_ONE, &z );
	bli_hemv( beta, c_orig, &t, &BLIS_ONE, &z );

	bli_subv( &z, &v );
	bli_normfv( &v, &norm );
	bli_getsc( &norm, resid, &junk );

	bli_obj_free( &t );
	bli_obj_free( &v );
	bli_obj_free( &w1 );
	bli_obj_free( &w2 );
	bli_obj_free( &z );
}
Example #12
0
void bli_trmv_front
     (
       obj_t*  alpha,
       obj_t*  a,
       obj_t*  x,
       cntx_t* cntx
     )
{
	trmv_t* trmv_cntl;
	num_t   dt_targ_a;
	num_t   dt_targ_x;
	bool_t  a_has_unit_inc;
	bool_t  x_has_unit_inc;
	obj_t   alpha_local;
	num_t   dt_alpha;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_trmv_check( alpha, a, x );


	// Query the target datatypes of each object.
	dt_targ_a = bli_obj_target_datatype( *a );
	dt_targ_x = bli_obj_target_datatype( *x );

	// Determine whether each operand with unit stride.
	a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
	                   bli_obj_is_col_stored( *a ) );
	x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );


	// Create an object to hold a copy-cast of alpha. Notice that we use
	// the type union of the target datatypes of a and x to prevent any
	// unnecessary loss of information during the computation.
	dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
	bli_obj_scalar_init_detached_copy_of( dt_alpha,
	                                      BLIS_NO_CONJUGATE,
	                                      alpha,
	                                      &alpha_local );


	// If all operands have unit stride, we choose a control tree for calling
	// the unblocked implementation directly without any blocking.
	if ( a_has_unit_inc &&
	     x_has_unit_inc )
	{
		// We use two control trees to handle the four cases corresponding to
		// combinations of transposition and row/column-storage.
		// The row-stored without transpose and column-stored with transpose
		// trees are identical. Same for the remaining two trees.
		if ( bli_obj_has_notrans( *a ) )
		{
			if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
			else                               trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
		}
		else // if ( bli_obj_has_trans( *a ) )
		{
			if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
			else                               trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
		}
	}
	else
	{
		// Mark objects with unit stride as already being packed. This prevents
		// unnecessary packing from happening within the blocked algorithm.
		if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
		if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );

		// Here, we make a similar choice as above, except that (1) we look
		// at storage tilt, and (2) we choose a tree that performs blocking.
		if ( bli_obj_has_notrans( *a ) )
		{
			if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol;
			else                               trmv_cntl = trmv_cntl_ge_ncol_trow;
		}
		else // if ( bli_obj_has_trans( *a ) )
		{
			if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow;
			else                               trmv_cntl = trmv_cntl_ge_nrow_tcol;
		}
	}

	// Invoke the internal back-end with the copy-cast of alpha and the
	// chosen control tree.
	bli_trmv_int( &alpha_local,
	              a,
	              x,
	              cntx,
	              trmv_cntl );
}
Example #13
0
void bli_her2_int( conj_t  conjh,
                   obj_t*  alpha,
                   obj_t*  alpha_conj,
                   obj_t*  x,
                   obj_t*  y,
                   obj_t*  c,
                   cntx_t* cntx,
                   her2_t* cntl )
{
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;
	obj_t     alpha_local;
	obj_t     alpha_conj_local;
	obj_t     x_local;
	obj_t     y_local;
	obj_t     c_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
	{
		if ( bli_is_conj( conjh ) ) bli_her2_check( alpha, x, y, c );
		else                        bli_syr2_check( alpha, x, y, c );
	}

	// If C, x, or y has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( c ) ) return;
	if ( bli_obj_has_zero_dim( x ) ) return;
	if ( bli_obj_has_zero_dim( y ) ) return;

	// Alias the operands in case we need to apply conjugations.
	bli_obj_alias_to( x, &x_local );
	bli_obj_alias_to( y, &y_local );
	bli_obj_alias_to( c, &c_local );

	// If matrix C is marked for conjugation, we interpret this as a request
	// to apply a conjugation to the other operands.
	if ( bli_obj_has_conj( &c_local ) )
	{
		bli_obj_toggle_conj( &c_local );

		bli_obj_toggle_conj( &x_local );
		bli_obj_toggle_conj( &y_local );

		bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
		                                      BLIS_CONJUGATE,
		                                      alpha,
		                                      &alpha_local );
		bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha_conj ),
		                                      BLIS_CONJUGATE,
		                                      alpha_conj,
		                                      &alpha_conj_local );
	}
	else
	{
		bli_obj_alias_to( *alpha, alpha_local );
		bli_obj_alias_to( *alpha_conj, alpha_conj_local );
	}


	// Extract the variant number and implementation type.
	n = bli_cntl_var_num( cntl );
	i = bli_cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( conjh,
	   &alpha_local,
	   &alpha_conj_local,
	   &x_local,
	   &y_local,
	   &c_local,
	   cntx,
	   cntl );
}
Example #14
0
void bli_her2k_front
     (
       obj_t*  alpha,
       obj_t*  a,
       obj_t*  b,
       obj_t*  beta,
       obj_t*  c,
       cntx_t* cntx,
       cntl_t* cntl
     )
{
	bli_init_once();

	obj_t    alpha_conj;
	obj_t    c_local;
	obj_t    a_local;
	obj_t    bh_local;
	obj_t    b_local;
	obj_t    ah_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_her2k_check( alpha, a, b, beta, c, cntx );

	// If alpha is zero, scale by beta, zero the imaginary components of
	// the diagonal elements, and return.
	if ( bli_obj_equals( alpha, &BLIS_ZERO ) )
	{
		bli_scalm( beta, c );
		bli_setid( &BLIS_ZERO, c );
		return;
	}

	// Alias A, B, and C in case we need to apply transformations.
	bli_obj_alias_to( a, &a_local );
	bli_obj_alias_to( b, &b_local );
	bli_obj_alias_to( c, &c_local );
	bli_obj_set_as_root( &c_local );

	// For her2k, the first and second right-hand "B" operands are simply B'
	// and A'.
	bli_obj_alias_to( b, &bh_local );
	bli_obj_induce_trans( &bh_local );
	bli_obj_toggle_conj( &bh_local );
	bli_obj_alias_to( a, &ah_local );
	bli_obj_induce_trans( &ah_local );
	bli_obj_toggle_conj( &ah_local );

	// Initialize a conjugated copy of alpha.
	bli_obj_scalar_init_detached_copy_of( bli_obj_dt( a ),
	                                      BLIS_CONJUGATE,
	                                      alpha,
	                                      &alpha_conj );

	// An optimization: If C is stored by rows and the micro-kernel prefers
	// contiguous columns, or if C is stored by columns and the micro-kernel
	// prefers contiguous rows, transpose the entire operation to allow the
	// micro-kernel to access elements of C in its preferred manner.
	if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
	{
		bli_obj_swap( &a_local, &bh_local );
		bli_obj_swap( &b_local, &ah_local );

		bli_obj_induce_trans( &a_local );
		bli_obj_induce_trans( &bh_local );
		bli_obj_induce_trans( &b_local );
		bli_obj_induce_trans( &ah_local );

		bli_obj_induce_trans( &c_local );
	}

	// Record the threading for each level within the context.
	bli_cntx_set_thrloop_from_env( BLIS_HER2K, BLIS_LEFT, cntx,
                                   bli_obj_length( &c_local ),
                                   bli_obj_width( &c_local ),
                                   bli_obj_width( &a_local ) );

	// Invoke herk twice, using beta only the first time.

	// Invoke the internal back-end.
	bli_l3_thread_decorator
	(
	  bli_gemm_int,
	  BLIS_HERK, // operation family id
	  alpha,
	  &a_local,
	  &bh_local,
	  beta,
	  &c_local,
	  cntx,
	  cntl
	);

	bli_l3_thread_decorator
	(
	  bli_gemm_int,
	  BLIS_HERK, // operation family id
	  &alpha_conj,
	  &b_local,
	  &ah_local,
	  &BLIS_ONE,
	  &c_local,
	  cntx,
	  cntl
	);

	// The Hermitian rank-2k product was computed as A*B'+B*A', even for
	// the diagonal elements. Mathematically, the imaginary components of
	// diagonal elements of a Hermitian rank-2k product should always be
	// zero. However, in practice, they sometimes accumulate meaningless
	// non-zero values. To prevent this, we explicitly set those values
	// to zero before returning.
	bli_setid( &BLIS_ZERO, &c_local );
}
Example #15
0
//
// Define object-based interface.
//
void bli_dotxf( obj_t*  alpha,
                obj_t*  a,
                obj_t*  x,
                obj_t*  beta,
                obj_t*  y )
{
	num_t     dt        = bli_obj_datatype( *x );

	conj_t    conja     = bli_obj_conj_status( *a );
	conj_t    conjx     = bli_obj_conj_status( *x );

	dim_t     m         = bli_obj_vector_dim( *y );
	dim_t     b_n       = bli_obj_vector_dim( *x );

	void*     buf_a     = bli_obj_buffer_at_off( *a );
	inc_t     rs_a      = bli_obj_row_stride( *a );
	inc_t     cs_a      = bli_obj_col_stride( *a );

	void*     buf_x     = bli_obj_buffer_at_off( *x );
	inc_t     inc_x     = bli_obj_vector_inc( *x );

	void*     buf_y     = bli_obj_buffer_at_off( *y );
	inc_t     inc_y     = bli_obj_vector_inc( *y );

	obj_t     alpha_local;
	void*     buf_alpha;

	obj_t     beta_local;
	void*     buf_beta;

	FUNCPTR_T f         = ftypes[dt];

	if ( bli_error_checking_is_enabled() )
	    bli_dotxf_check( alpha, a, x, beta, y );

	// Create local copy-casts of the scalars (and apply internal conjugation
	// if needed).
	bli_obj_scalar_init_detached_copy_of( dt,
	                                      BLIS_NO_CONJUGATE,
	                                      alpha,
	                                      &alpha_local );
	bli_obj_scalar_init_detached_copy_of( dt,
	                                      BLIS_NO_CONJUGATE,
	                                      beta,
	                                      &beta_local );

	// Extract the scalar buffers.
	buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local );
	buf_beta  = bli_obj_buffer_for_1x1( dt, beta_local );

	// Support cases where matrix A requires a transposition.
	if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); }

	// Invoke the void pointer-based function.
	f( conja,
	   conjx,
	   m,
	   b_n,
	   buf_alpha,
	   buf_a, rs_a, cs_a,
	   buf_x, inc_x,
	   buf_beta,
	   buf_y, inc_y );
}
Example #16
0
void bli_ger_int( conj_t  conjx,
                  conj_t  conjy,
                  obj_t*  alpha,
                  obj_t*  x,
                  obj_t*  y,
                  obj_t*  a,
                  cntx_t* cntx,
                  ger_t*  cntl )
{
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;
	obj_t     alpha_local;
	obj_t     x_local;
	obj_t     y_local;
	obj_t     a_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_ger_check( alpha, x, y, a );

	// If A has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( a ) ) return;

	// If x or y has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( x ) ||
	     bli_obj_has_zero_dim( y ) ) return;

	// Alias the objects, applying conjx and conjy to x and y, respectively.
	bli_obj_alias_with_conj( conjx, x, &x_local );
	bli_obj_alias_with_conj( conjy, y, &y_local );
	bli_obj_alias_to( a, &a_local );

	// If matrix A is marked for conjugation, we interpret this as a request
	// to apply a conjugation to the other operands.
	if ( bli_obj_has_conj( &a_local ) )
	{
		bli_obj_toggle_conj( &a_local );

		bli_obj_toggle_conj( &x_local );
		bli_obj_toggle_conj( &y_local );

		bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
		                                      BLIS_CONJUGATE,
		                                      alpha,
		                                      &alpha_local );
	}
	else
	{
		bli_obj_alias_to( *alpha, alpha_local );
	}

	// If we are about the call a leaf-level implementation, and matrix A
	// still needs a transposition, then we must induce one by swapping the
	// strides and dimensions.
	if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( &a_local ) )
	{
		bli_obj_induce_trans( &a_local );
		bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
	}

	// Extract the variant number and implementation type.
	n = bli_cntl_var_num( cntl );
	i = bli_cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( &alpha_local,
	   &x_local,
	   &y_local,
	   &a_local,
	   cntx,
	   cntl );
}