示例#1
0
siz_t bli_datatype_size( num_t dt )
{
	if ( bli_error_checking_is_enabled() )
		bli_datatype_size_check( dt );

	return dt_sizes[dt];
}
示例#2
0
num_t bli_datatype_union( num_t dt1, num_t dt2 )
{
	if ( bli_error_checking_is_enabled() )
		bli_datatype_union_check( dt1, dt2 );

	return type_union[dt1][dt2];
}
示例#3
0
void bli_obj_create_const( double value, obj_t* obj )
{
	gint_t*   temp_i;
	float*    temp_s;
	double*   temp_d;
	scomplex* temp_c;
	dcomplex* temp_z;

	if ( bli_error_checking_is_enabled() )
		bli_obj_create_const_check( value, obj );

	bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, obj );

	temp_s = bli_obj_buffer_for_const( BLIS_FLOAT,    *obj );
	temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE,   *obj );
	temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *obj );
	temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *obj );
	temp_i = bli_obj_buffer_for_const( BLIS_INT,      *obj );

	// Use the bli_??sets() macros to set the temp variables in order to
	// properly support BLIS_ENABLE_C99_COMPLEX.
	bli_dssets( value, 0.0, *temp_s );
	bli_ddsets( value, 0.0, *temp_d );
	bli_dcsets( value, 0.0, *temp_c );
	bli_dzsets( value, 0.0, *temp_z );

	*temp_i = ( gint_t ) value;
}
示例#4
0
文件: bli_addv.c 项目: ShawnLess/blis
//
// Define object-based interface.
//
void bli_addv( obj_t*  x,
               obj_t*  y )
{
	num_t     dt        = bli_obj_datatype( *x );

	conj_t    conjx     = bli_obj_conj_status( *x );

	dim_t     n         = bli_obj_vector_dim( *x );

	inc_t     inc_x     = bli_obj_vector_inc( *x );
	void*     buf_x     = bli_obj_buffer_at_off( *x );

	inc_t     inc_y     = bli_obj_vector_inc( *y );
	void*     buf_y     = bli_obj_buffer_at_off( *y );

	FUNCPTR_T f         = ftypes[dt];

	if ( bli_error_checking_is_enabled() )
	    bli_addv_check( x, y );

	// Invoke the void pointer-based function.
	f( conjx,
	   n,
	   buf_x, inc_x,
	   buf_y, inc_y );
}
示例#5
0
//
// Define object-based interface.
//
void bli_randm( obj_t* x )
{
	if ( bli_error_checking_is_enabled() )
		bli_randm_check( x );

	bli_randm_unb_var1( x );
}
示例#6
0
void bli_getsc( obj_t*  chi,
                double* beta_r,
                double* beta_i )
{
	num_t     dt_chi   = bli_obj_datatype( *chi );
	num_t     dt_def   = BLIS_DCOMPLEX;
	num_t     dt_use;

	// If chi is a constant object, default to using the dcomplex
	// value within since we don't know if the caller needs just the
	// real or the real and imaginary parts.
	void*     buf_chi  = bli_obj_scalar_buffer( dt_def, *chi );

	FUNCPTR_T f;

	if ( bli_error_checking_is_enabled() )
		bli_getsc_check( chi, beta_r, beta_i );

	// The _check() routine prevents integer types, so we know that chi
	// is either a constant or an actual floating-point type.
	if ( bli_is_constant( dt_chi ) ) dt_use = dt_def;
	else                             dt_use = dt_chi;

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_use];

	// Invoke the function.
	f( buf_chi,
	   beta_r,
	   beta_i );
}
示例#7
0
//
// Define object-based interface.
//
void bli_scalv( obj_t*  alpha,
                obj_t*  x )
{
	num_t     dt        = bli_obj_datatype( *x );

	dim_t     n         = bli_obj_vector_dim( *x );

	inc_t     inc_x     = bli_obj_vector_inc( *x );
	void*     buf_x     = bli_obj_buffer_at_off( *x );

	obj_t     alpha_local;
	void*     buf_alpha;

	FUNCPTR_T f         = ftypes[dt];

	if ( bli_error_checking_is_enabled() )
	    bli_scalv_check( alpha, x );

	// Create a local copy-cast of alpha (and apply internal conjugation
	// if needed).
	bli_obj_scalar_init_detached_copy_of( dt,
	                                      BLIS_NO_CONJUGATE,
	                                      alpha,
	                                      &alpha_local );

	// Extract the scalar buffer.
	buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local );

	// Invoke the void pointer-based function.
	f( BLIS_NO_CONJUGATE, // conjugation applied during copy-cast.
	   n,
	   buf_alpha,
	   buf_x, inc_x );
}
示例#8
0
void bli_scalv_int( obj_t*   beta,
                    obj_t*   x,
                    scalv_t* cntl )
{
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_scalv_int_check( beta, x, cntl );

	// First check if we are to skip this operation.
	if ( cntl_is_noop( cntl ) ) return;

	// Return early if one of the matrix operands has a zero dimension.
	if ( bli_obj_has_zero_dim( *x ) ) return;

	// Return early if the beta scalar equals one.
	if ( bli_obj_equals( beta, &BLIS_ONE ) ) return;

	// Extract the variant number and implementation type.
	n = cntl_var_num( cntl );
	i = cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( beta,
	   x );
}
示例#9
0
//
// Define object-based interface.
//
void bli_normfv( obj_t* x,
                 obj_t* norm )
{
	if ( bli_error_checking_is_enabled() )
		bli_normfv_check( x, norm );

	bli_normfv_unb_var1( x, norm );
}
示例#10
0
//
// Define object-based interface.
//
void bli_addsc( obj_t* chi,
                obj_t* psi )
{
	if ( bli_error_checking_is_enabled() )
		bli_addsc_check( chi, psi );

	bli_addsc_unb_var1( chi, psi );
}
示例#11
0
//
// Define object-based interface.
//
void bli_subd( obj_t* x,
               obj_t* y )
{
	if ( bli_error_checking_is_enabled() )
		bli_subd_check( x, y );

	bli_subd_unb_var1( x, y );
}
示例#12
0
//
// Define object-based interface.
//
void bli_copym( obj_t* x,
                obj_t* y )
{
	if ( bli_error_checking_is_enabled() )
		bli_copym_check( x, y );

	bli_copym_unb_var1( x, y );
}
示例#13
0
//
// Define object-based interface.
//
void bli_normfsc( obj_t* chi,
                  obj_t* norm )
{
	if ( bli_error_checking_is_enabled() )
		bli_normfsc_check( chi, norm );

	bli_normfsc_unb_var1( chi, norm );
}
示例#14
0
//
// Define object-based interface.
//
void bli_unzipsc( obj_t* beta,
                  obj_t* chi_r,
                  obj_t* chi_i )
{
	if ( bli_error_checking_is_enabled() )
		bli_unzipsc_check( beta, chi_r, chi_i );

	bli_unzipsc_unb_var1( beta, chi_r, chi_i );
}
示例#15
0
//
// Define object-based interface.
//
void bli_zipsc( obj_t* beta_r,
                obj_t* beta_i,
                obj_t* chi )
{
	if ( bli_error_checking_is_enabled() )
		bli_zipsc_check( beta_r, beta_i, chi );

	bli_zipsc_unb_var1( beta_r, beta_i, chi );
}
示例#16
0
//
// Define object-based interface.
//
void bli_mksymm( obj_t* a )
{
	if ( bli_error_checking_is_enabled() )
		bli_mksymm_check( a );

	bli_mksymm_unb_var1( a );

	// Mark the original object as dense.
	//bli_obj_set_uplo( BLIS_DENSE, *a );
}
示例#17
0
//
// Define object-based interface.
//
void bli_scalm( obj_t* beta,
                obj_t* x )
{
	if ( bli_error_checking_is_enabled() )
		bli_scalm_check( beta, x );

	bli_scalm_int( beta,
	               x,
	               scalm_cntl );
}
示例#18
0
void* bli_malloc_align
     (
       malloc_ft f,
       size_t    size,
       size_t    align_size
     )
{
	const size_t ptr_size     = sizeof( void* );
	size_t       align_offset = 0;
	void*        p_orig;
	int8_t*      p_byte;
	void**       p_addr;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_malloc_align_check( f, size, align_size );

	// Return early if zero bytes were requested.
	if ( size == 0 ) return NULL;

	// Add the alignment size and the size of a pointer to the number
	// of bytes to allocate.
	size += align_size + ptr_size;

	// Call the allocation function.
	p_orig = f( size );

	// If NULL was returned, something is probably very wrong.
	if ( p_orig == NULL ) bli_abort();

	// Advance the pointer by one pointer element.
	p_byte = p_orig;
	p_byte += ptr_size;

	// Compute the offset to the desired alignment.
	if ( bli_is_unaligned_to( ( siz_t )p_byte, ( siz_t )align_size ) )
	{
		align_offset = align_size -
		               bli_offset_past_alignment( ( siz_t )p_byte,
		                                          ( siz_t )align_size );
	}

	// Advance the pointer using the difference between the alignment
	// size and the alignment offset.
	p_byte += align_offset;

	// Compute the address of the pointer element just before the start
	// of the aligned address, and store the original address there.
	p_addr = ( void** )(p_byte - ptr_size);
	*p_addr = p_orig;

	// Return the aligned pointer.
	return p_byte;
}
示例#19
0
void bli_obj_create_const_copy_of( obj_t* a, obj_t* b )
{
	gint_t*   temp_i;
	float*    temp_s;
	double*   temp_d;
	scomplex* temp_c;
	dcomplex* temp_z;
	void*     buf_a;
	dcomplex  value;

	if ( bli_error_checking_is_enabled() )
		bli_obj_create_const_copy_of_check( a, b );

	bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, b );

	temp_s = bli_obj_buffer_for_const( BLIS_FLOAT,    *b );
	temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE,   *b );
	temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *b );
	temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *b );
	temp_i = bli_obj_buffer_for_const( BLIS_INT,      *b );

	buf_a = bli_obj_buffer_at_off( *a );

	bli_zzsets( 0.0, 0.0, value ); 

	if ( bli_obj_is_float( *a ) )
	{
		bli_szcopys( *(( float*    )buf_a), value );
	}
	else if ( bli_obj_is_double( *a ) )
	{
		bli_dzcopys( *(( double*   )buf_a), value );
	}
	else if ( bli_obj_is_scomplex( *a ) )
	{
		bli_czcopys( *(( scomplex* )buf_a), value );
	}
	else if ( bli_obj_is_dcomplex( *a ) )
	{
		bli_zzcopys( *(( dcomplex* )buf_a), value );
	}
	else
	{
		bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
	}

	bli_zscopys( value, *temp_s );
	bli_zdcopys( value, *temp_d );
	bli_zccopys( value, *temp_c );
	bli_zzcopys( value, *temp_z );

	*temp_i = ( gint_t ) bli_zreal( value );
}
示例#20
0
void bli_obj_print( char* label, obj_t* obj )
{
	FILE*  file     = stdout;

	if ( bli_error_checking_is_enabled() )
		bli_obj_print_check( label, obj );

	fprintf( file, "\n" );
	fprintf( file, "%s\n", label );
	fprintf( file, "\n" );

	fprintf( file, " m x n           %lu x %lu\n", ( unsigned long int )bli_obj_length( *obj ),
	                                               ( unsigned long int )bli_obj_width( *obj ) );
	fprintf( file, "\n" );

	fprintf( file, " offm, offn      %lu, %lu\n", ( unsigned long int )bli_obj_row_off( *obj ),
	                                              ( unsigned long int )bli_obj_col_off( *obj ) );
	fprintf( file, " diagoff         %ld\n", ( signed long int )bli_obj_diag_offset( *obj ) );
	fprintf( file, "\n" );

	fprintf( file, " buf             %p\n",  ( void* )bli_obj_buffer( *obj ) );
	fprintf( file, " elem size       %lu\n", ( unsigned long int )bli_obj_elem_size( *obj ) );
	fprintf( file, " rs, cs          %ld, %ld\n", ( signed long int )bli_obj_row_stride( *obj ),
	                                              ( signed long int )bli_obj_col_stride( *obj ) );
	fprintf( file, " is              %ld\n", ( signed long int )bli_obj_imag_stride( *obj ) );
	fprintf( file, " m_padded        %lu\n", ( unsigned long int )bli_obj_padded_length( *obj ) );
	fprintf( file, " n_padded        %lu\n", ( unsigned long int )bli_obj_padded_width( *obj ) );
	fprintf( file, " ps              %lu\n", ( unsigned long int )bli_obj_panel_stride( *obj ) );
	fprintf( file, "\n" );

	fprintf( file, " info            %lX\n", ( unsigned long int )(*obj).info );
	fprintf( file, " - is complex    %lu\n", ( unsigned long int )bli_obj_is_complex( *obj ) );
	fprintf( file, " - is d. prec    %lu\n", ( unsigned long int )bli_obj_is_double_precision( *obj ) );
	fprintf( file, " - datatype      %lu\n", ( unsigned long int )bli_obj_datatype( *obj ) );
	fprintf( file, " - target dt     %lu\n", ( unsigned long int )bli_obj_target_datatype( *obj ) );
	fprintf( file, " - exec dt       %lu\n", ( unsigned long int )bli_obj_execution_datatype( *obj ) );
	fprintf( file, " - has trans     %lu\n", ( unsigned long int )bli_obj_has_trans( *obj ) );
	fprintf( file, " - has conj      %lu\n", ( unsigned long int )bli_obj_has_conj( *obj ) );
	fprintf( file, " - unit diag?    %lu\n", ( unsigned long int )bli_obj_has_unit_diag( *obj ) );
	fprintf( file, " - struc type    %lu\n", ( unsigned long int )bli_obj_struc( *obj ) >> BLIS_STRUC_SHIFT );
	fprintf( file, " - uplo type     %lu\n", ( unsigned long int )bli_obj_uplo( *obj ) >> BLIS_UPLO_SHIFT );
	fprintf( file, "   - is upper    %lu\n", ( unsigned long int )bli_obj_is_upper( *obj ) );
	fprintf( file, "   - is lower    %lu\n", ( unsigned long int )bli_obj_is_lower( *obj ) );
	fprintf( file, "   - is dense    %lu\n", ( unsigned long int )bli_obj_is_dense( *obj ) );
	fprintf( file, " - pack schema   %lu\n", ( unsigned long int )bli_obj_pack_schema( *obj ) >> BLIS_PACK_SCHEMA_SHIFT );
	fprintf( file, " - packinv diag? %lu\n", ( unsigned long int )bli_obj_has_inverted_diag( *obj ) );
	fprintf( file, " - pack ordifup  %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_upper( *obj ) );
	fprintf( file, " - pack ordiflo  %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_lower( *obj ) );
	fprintf( file, " - packbuf type  %lu\n", ( unsigned long int )bli_obj_pack_buffer_type( *obj ) >> BLIS_PACK_BUFFER_SHIFT );
	fprintf( file, "\n" );
}
示例#21
0
void bli_her_int( conj_t  conjh,
                  obj_t*  alpha,
                  obj_t*  x,
                  obj_t*  c,
                  her_t*  cntl )
{
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;
	obj_t     x_local;
	obj_t     c_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_her_int_check( conjh, alpha, x, c, cntl );

	// If C or x has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( *c ) ) return;
	if ( bli_obj_has_zero_dim( *x ) ) return;

	// Alias the operands in case we need to apply conjugations.
	bli_obj_alias_to( *x, x_local );
	bli_obj_alias_to( *c, c_local );

	// If matrix C is marked for conjugation, we interpret this as a request
	// to apply a conjugation to the other operands.
	if ( bli_obj_has_conj( c_local ) )
	{
		bli_obj_toggle_conj( c_local );

		// Notice that we don't need to conjugate alpha since it is guaranteed
		// to be real.
		bli_obj_toggle_conj( x_local );
	}

	// Extract the variant number and implementation type.
	n = cntl_var_num( cntl );
	i = cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( conjh,
	   alpha,
	   &x_local,
	   &c_local,
	   cntl );
}
示例#22
0
void bli_obj_free( obj_t* obj )
{
	if ( bli_error_checking_is_enabled() )
		bli_obj_free_check( obj );

	// Don't dereference obj if it is NULL.
	if ( obj != NULL )
	{
		// Idiot safety: Don't try to free the buffer field if the object
		// is a detached scalar (ie: if the buffer pointer refers to the
		// address of the internal scalar buffer).
		if ( bli_obj_buffer( *obj ) != bli_obj_internal_scalar_buffer( *obj ) )
			bli_free_user( bli_obj_buffer( *obj ) );
	}
}
示例#23
0
void bli_fprintm( FILE* file, char* s1, obj_t* x, char* format, char* s2 )
{
	num_t     dt_x      = bli_obj_datatype( *x );

	dim_t     m         = bli_obj_length( *x );
	dim_t     n         = bli_obj_width( *x );

	inc_t     rs_x      = bli_obj_row_stride( *x );
	inc_t     cs_x      = bli_obj_col_stride( *x );
	void*     buf_x     = bli_obj_buffer_at_off( *x );

	FUNCPTR_T f;

	if ( bli_error_checking_is_enabled() )
		bli_fprintm_check( file, s1, x, format, s2 );

	// Handle constants up front.
	if ( dt_x == BLIS_CONSTANT )
	{
		float*    sp = bli_obj_buffer_for_const( BLIS_FLOAT,    *x );
		double*   dp = bli_obj_buffer_for_const( BLIS_DOUBLE,   *x );
		scomplex* cp = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *x );
		dcomplex* zp = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *x );
		gint_t*   ip = bli_obj_buffer_for_const( BLIS_INT,      *x );

		fprintf( file, "%s\n", s1 );
		fprintf( file, " float:     %9.2e\n",         bli_sreal( *sp ) );
		fprintf( file, " double:    %9.2e\n",         bli_dreal( *dp ) );
		fprintf( file, " scomplex:  %9.2e + %9.2e\n", bli_creal( *cp ), bli_cimag( *cp ) );
		fprintf( file, " dcomplex:  %9.2e + %9.2e\n", bli_zreal( *zp ), bli_zimag( *zp ) );
		fprintf( file, " int:       %ld\n",           *ip );
		fprintf( file, "\n" );
		return;
	}

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_x];

	// Invoke the function.
	f( file,
	   s1,
	   m,
	   n,
	   buf_x, rs_x, cs_x,
	   format,
	   s2 );
}
示例#24
0
void bli_obj_attach_buffer( void*  p,
                            inc_t  rs,
                            inc_t  cs,
                            inc_t  is,
                            obj_t* obj )
{
	// Check that the strides and lengths are compatible. Note that the
	// user *must* specify valid row and column strides when attaching an
	// external buffer.
	if ( bli_error_checking_is_enabled() )
		bli_obj_attach_buffer_check( p, rs, cs, is, obj );

	// Update the object.
	bli_obj_set_buffer( p, *obj );
	bli_obj_set_strides( rs, cs, *obj );
	bli_obj_set_imag_stride( is, *obj );
}
示例#25
0
void bli_obj_create_without_buffer( num_t  dt,
                                    dim_t  m,
                                    dim_t  n,
                                    obj_t* obj )
{
	siz_t  elem_size;
	void*  s;

	if ( bli_error_checking_is_enabled() )
		bli_obj_create_without_buffer_check( dt, m, n, obj );

	// Query the size of one element of the object's pre-set datatype.
	elem_size = bli_datatype_size( dt );

	// Set any default properties that are appropriate.
	bli_obj_set_defaults( *obj );

	// Set the object root to itself, since obj is not presumed to be a view
	// into a larger matrix. This is typically the only time this field is
	// ever set; henceforth, subpartitions and aliases to this object will
	// get copies of this field, and thus always have access to its
	// "greatest-grand" parent (ie: the original parent, or "root", object).
	// However, there ARE a few places where it is convenient to reset the
	// root field explicitly via bli_obj_set_as_root(). (We do not list
	// those places here. Just grep for bli_obj_set_as_root within the
	// top-level 'frame' directory to see them.
	bli_obj_set_as_root( *obj );

	// Set individual fields.
	bli_obj_set_buffer( NULL, *obj );
	bli_obj_set_datatype( dt, *obj );
	bli_obj_set_elem_size( elem_size, *obj );
	bli_obj_set_target_datatype( dt, *obj );
	bli_obj_set_execution_datatype( dt, *obj );
	bli_obj_set_dims( m, n, *obj );
	bli_obj_set_offs( 0, 0, *obj );
	bli_obj_set_diag_offset( 0, *obj );

	// Set the internal scalar to 1.0.
	s = bli_obj_internal_scalar_buffer( *obj );

	if      ( bli_is_float( dt )    ) { bli_sset1s( *(( float*    )s) ); }
	else if ( bli_is_double( dt )   ) { bli_dset1s( *(( double*   )s) ); }
	else if ( bli_is_scomplex( dt ) ) { bli_cset1s( *(( scomplex* )s) ); }
	else if ( bli_is_dcomplex( dt ) ) { bli_zset1s( *(( dcomplex* )s) ); }
}
示例#26
0
void bli_scalm_int( obj_t*   beta,
                    obj_t*   x,
                    scalm_t* cntl )
{
	obj_t     x_local;
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_scalm_int_check( beta, x, cntl );

	// First check if we are to skip this operation.
	if ( cntl_is_noop( cntl ) ) return;

	// Return early if one of the matrix operands has a zero dimension.
	if ( bli_obj_has_zero_dim( *x ) ) return;

	// Return early if both beta and the scalar attached to x are unit.
	if ( bli_obj_equals( beta, &BLIS_ONE ) &&
	     bli_obj_scalar_equals( x, &BLIS_ONE ) ) return;

	// Alias x to x_local so we can apply beta if it is non-unit.
	bli_obj_alias_to( *x, x_local );

	// If beta is non-unit, apply it to the scalar attached to x.
	if ( !bli_obj_equals( beta, &BLIS_ONE ) )
	{
		bli_obj_scalar_apply_scalar( beta, &x_local );
	}

	// Extract the variant number and implementation type.
	n = cntl_var_num( cntl );
	i = cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( &x_local );
}
示例#27
0
//
// Define object-based interface.
//
void bli_setm( obj_t* beta,
               obj_t* x )
{
	num_t dt_x;
	obj_t beta_local;

	if ( bli_error_checking_is_enabled() )
		bli_setm_check( beta, x );

	// Use the datatype of x as the target type for beta (since we do
	// not assume mixed domain/type support is enabled).
	dt_x = bli_obj_datatype( *x );

	// Create an object to hold a copy-cast of beta.
	bli_obj_scalar_init_detached_copy_of( dt_x,
	                                      BLIS_NO_CONJUGATE,
	                                      beta,
	                                      &beta_local );

	bli_setm_unb_var1( &beta_local, x );
}
示例#28
0
void bli_syr2_front
     (
       obj_t*  alpha,
       obj_t*  x,
       obj_t*  y,
       obj_t*  c,
       cntx_t* cntx
     )
{
	her2_t* her2_cntl;
	num_t   dt_targ_x;
	num_t   dt_targ_y;
	//num_t   dt_targ_c;
	bool_t  x_has_unit_inc;
	bool_t  y_has_unit_inc;
	bool_t  c_has_unit_inc;
	obj_t   alpha_local;
	num_t   dt_alpha;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_syr2_check( alpha, x, y, c );


	// Query the target datatypes of each object.
	dt_targ_x = bli_obj_target_dt( x );
	dt_targ_y = bli_obj_target_dt( y );
	//dt_targ_c = bli_obj_target_dt( c );

	// Determine whether each operand with unit stride.
	x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
	y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
	c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
	                   bli_obj_is_col_stored( c ) );


	// Create an object to hold a copy-cast of alpha. Notice that we use
	// the type union of the datatypes of x and y.
	dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y );
	bli_obj_scalar_init_detached_copy_of( dt_alpha,
	                                      BLIS_NO_CONJUGATE,
	                                      alpha,
	                                      &alpha_local );


	// If all operands have unit stride, we choose a control tree for calling
	// the unblocked implementation directly without any blocking.
	if ( x_has_unit_inc &&
	     y_has_unit_inc &&
	     c_has_unit_inc )
	{
		// We use two control trees to handle the four cases corresponding to
		// combinations of upper/lower triangular storage and row/column-storage.
		// The row-stored lower triangular and column-stored upper triangular
		// trees are identical. Same for the remaining two trees.
		if ( bli_obj_is_lower( c ) )
		{
			if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
			else                               her2_cntl = her2_cntl_bs_ke_lcol_urow;
		}
		else // if ( bli_obj_is_upper( c ) )
		{
			if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
			else                               her2_cntl = her2_cntl_bs_ke_lrow_ucol;
		}
	}
	else
	{
		// Mark objects with unit stride as already being packed. This prevents
		// unnecessary packing from happening within the blocked algorithm.
		if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
		if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
		if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );

		// Here, we make a similar choice as above, except that (1) we look
		// at storage tilt, and (2) we choose a tree that performs blocking.
		if ( bli_obj_is_lower( c ) )
		{
			if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
			else                               her2_cntl = her2_cntl_ge_lcol_urow;
		}
		else // if ( bli_obj_is_upper( c ) )
		{
			if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
			else                               her2_cntl = her2_cntl_ge_lrow_ucol;
		}
	}

	// Invoke the internal back-end with the copy-cast scalar and the
	// chosen control tree. Set conjh to BLIS_NO_CONJUGATE to invoke the
	// symmetric (and not Hermitian) algorithms.
	bli_her2_int( BLIS_NO_CONJUGATE,
	              &alpha_local,
	              &alpha_local,
	              x,
	              y,
	              c,
	              cntx,
	              her2_cntl );
}
示例#29
0
void bli_her2k_front( obj_t*  alpha,
                      obj_t*  a,
                      obj_t*  b,
                      obj_t*  beta,
                      obj_t*  c,
                      herk_t* cntl )
{
	obj_t    alpha_conj;
	obj_t    c_local;
	obj_t    a_local;
	obj_t    bh_local;
	obj_t    b_local;
	obj_t    ah_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_her2k_check( alpha, a, b, beta, c );

	// If alpha is zero, scale by beta and return.
	if ( bli_obj_equals( alpha, &BLIS_ZERO ) )
	{
		bli_scalm( beta, c );
		return;
	}

	// Alias A, B, and C in case we need to apply transformations.
	bli_obj_alias_to( *a, a_local );
	bli_obj_alias_to( *b, b_local );
	bli_obj_alias_to( *c, c_local );
	bli_obj_set_as_root( c_local );

	// For her2k, the first and second right-hand "B" operands are simply B'
	// and A'.
	bli_obj_alias_to( *b, bh_local );
	bli_obj_induce_trans( bh_local );
	bli_obj_toggle_conj( bh_local );
	bli_obj_alias_to( *a, ah_local );
	bli_obj_induce_trans( ah_local );
	bli_obj_toggle_conj( ah_local );

	// Initialize a conjugated copy of alpha.
	bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
	                                      BLIS_CONJUGATE,
	                                      alpha,
	                                      &alpha_conj );

	// An optimization: If C is row-stored, transpose the entire operation
	// so as to allow the macro-kernel more favorable access patterns
	// through C. (The effect of the transposition of A and A' is negligible
	// because those operands are always packed to contiguous memory.)
	if ( bli_obj_is_row_stored( c_local ) )
	{
		bli_obj_swap( a_local, bh_local );
		bli_obj_swap( b_local, ah_local );

		bli_obj_induce_trans( a_local );
		bli_obj_induce_trans( bh_local );
		bli_obj_induce_trans( b_local );
		bli_obj_induce_trans( ah_local );

		bli_obj_induce_trans( c_local );
	}

#if 0
	// Invoke the internal back-end.
	bli_her2k_int( alpha,
	               &a_local,
	               &bh_local,
	               &alpha_conj,
	               &b_local,
	               &ah_local,
	               beta,
	               &c_local,
	               cntl );
#else
	// Invoke herk twice, using beta only the first time.
	bli_herk_int( alpha,
	              &a_local,
	              &bh_local,
	              beta,
	              &c_local,
	              cntl );

	bli_herk_int( &alpha_conj,
	              &b_local,
	              &ah_local,
	              &BLIS_ONE,
	              &c_local,
	              cntl );
#endif
}
示例#30
0
void bli_her2k_front( obj_t*  alpha,
                      obj_t*  a,
                      obj_t*  b,
                      obj_t*  beta,
                      obj_t*  c,
                      gemm_t* cntl )
{
	obj_t    alpha_conj;
	obj_t    c_local;
	obj_t    a_local;
	obj_t    bh_local;
	obj_t    b_local;
	obj_t    ah_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_her2k_check( alpha, a, b, beta, c );

	// If alpha is zero, scale by beta, zero the imaginary components of
	// the diagonal elements, and return.
	if ( bli_obj_equals( alpha, &BLIS_ZERO ) )
	{
		bli_scalm( beta, c );
		bli_setid( &BLIS_ZERO, c );
		return;
	}

	// Alias A, B, and C in case we need to apply transformations.
	bli_obj_alias_to( *a, a_local );
	bli_obj_alias_to( *b, b_local );
	bli_obj_alias_to( *c, c_local );
	bli_obj_set_as_root( c_local );

	// For her2k, the first and second right-hand "B" operands are simply B'
	// and A'.
	bli_obj_alias_to( *b, bh_local );
	bli_obj_induce_trans( bh_local );
	bli_obj_toggle_conj( bh_local );
	bli_obj_alias_to( *a, ah_local );
	bli_obj_induce_trans( ah_local );
	bli_obj_toggle_conj( ah_local );

	// Initialize a conjugated copy of alpha.
	bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
	                                      BLIS_CONJUGATE,
	                                      alpha,
	                                      &alpha_conj );

	// An optimization: If C is stored by rows and the micro-kernel prefers
	// contiguous columns, or if C is stored by columns and the micro-kernel
	// prefers contiguous rows, transpose the entire operation to allow the
	// micro-kernel to access elements of C in its preferred manner.
	if (
	     ( bli_obj_is_row_stored( c_local ) &&
	       bli_func_prefers_contig_cols( bli_obj_datatype( c_local ),
	                                     bli_gemm_cntl_ukrs( cntl ) ) ) ||
	     ( bli_obj_is_col_stored( c_local ) &&
	       bli_func_prefers_contig_rows( bli_obj_datatype( c_local ),
	                                     bli_gemm_cntl_ukrs( cntl ) ) )
	   )
	{
		bli_obj_swap( a_local, bh_local );
		bli_obj_swap( b_local, ah_local );

		bli_obj_induce_trans( a_local );
		bli_obj_induce_trans( bh_local );
		bli_obj_induce_trans( b_local );
		bli_obj_induce_trans( ah_local );

		bli_obj_induce_trans( c_local );
	}

#if 0
	// Invoke the internal back-end.
	bli_her2k_int( alpha,
	               &a_local,
	               &bh_local,
	               &alpha_conj,
	               &b_local,
	               &ah_local,
	               beta,
	               &c_local,
	               cntl );
#else

	// Invoke herk twice, using beta only the first time.
    herk_thrinfo_t** infos = bli_create_herk_thrinfo_paths();
    dim_t n_threads = thread_num_threads( infos[0] );

    // Invoke the internal back-end.
    bli_level3_thread_decorator( n_threads,   
                                 (level3_int_t) bli_herk_int, 
                                 alpha, 
                                 &a_local,  
                                 &bh_local,  
                                 beta, 
                                 &c_local,  
                                 (void*) cntl, 
                                 (void**) infos );

    bli_level3_thread_decorator( n_threads,   
                                 (level3_int_t) bli_herk_int, 
                                 &alpha_conj, 
                                 &b_local,  
                                 &ah_local,  
                                 &BLIS_ONE, 
                                 &c_local,  
                                 (void*) cntl, 
                                 (void**) infos );

    bli_herk_thrinfo_free_paths( infos, n_threads );

#endif

	// The Hermitian rank-2k product was computed as A*B'+B*A', even for
	// the diagonal elements. Mathematically, the imaginary components of
	// diagonal elements of a Hermitian rank-2k product should always be
	// zero. However, in practice, they sometimes accumulate meaningless
	// non-zero values. To prevent this, we explicitly set those values
	// to zero before returning.
	bli_setid( &BLIS_ZERO, &c_local );

}