siz_t bli_datatype_size( num_t dt ) { if ( bli_error_checking_is_enabled() ) bli_datatype_size_check( dt ); return dt_sizes[dt]; }
num_t bli_datatype_union( num_t dt1, num_t dt2 ) { if ( bli_error_checking_is_enabled() ) bli_datatype_union_check( dt1, dt2 ); return type_union[dt1][dt2]; }
void bli_obj_create_const( double value, obj_t* obj ) { gint_t* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; if ( bli_error_checking_is_enabled() ) bli_obj_create_const_check( value, obj ); bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, obj ); temp_s = bli_obj_buffer_for_const( BLIS_FLOAT, *obj ); temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE, *obj ); temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *obj ); temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *obj ); temp_i = bli_obj_buffer_for_const( BLIS_INT, *obj ); // Use the bli_??sets() macros to set the temp variables in order to // properly support BLIS_ENABLE_C99_COMPLEX. bli_dssets( value, 0.0, *temp_s ); bli_ddsets( value, 0.0, *temp_d ); bli_dcsets( value, 0.0, *temp_c ); bli_dzsets( value, 0.0, *temp_z ); *temp_i = ( gint_t ) value; }
// // Define object-based interface. // void bli_addv( obj_t* x, obj_t* y ) { num_t dt = bli_obj_datatype( *x ); conj_t conjx = bli_obj_conj_status( *x ); dim_t n = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t inc_y = bli_obj_vector_inc( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); FUNCPTR_T f = ftypes[dt]; if ( bli_error_checking_is_enabled() ) bli_addv_check( x, y ); // Invoke the void pointer-based function. f( conjx, n, buf_x, inc_x, buf_y, inc_y ); }
// // Define object-based interface. // void bli_randm( obj_t* x ) { if ( bli_error_checking_is_enabled() ) bli_randm_check( x ); bli_randm_unb_var1( x ); }
void bli_getsc( obj_t* chi, double* beta_r, double* beta_i ) { num_t dt_chi = bli_obj_datatype( *chi ); num_t dt_def = BLIS_DCOMPLEX; num_t dt_use; // If chi is a constant object, default to using the dcomplex // value within since we don't know if the caller needs just the // real or the real and imaginary parts. void* buf_chi = bli_obj_scalar_buffer( dt_def, *chi ); FUNCPTR_T f; if ( bli_error_checking_is_enabled() ) bli_getsc_check( chi, beta_r, beta_i ); // The _check() routine prevents integer types, so we know that chi // is either a constant or an actual floating-point type. if ( bli_is_constant( dt_chi ) ) dt_use = dt_def; else dt_use = dt_chi; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_use]; // Invoke the function. f( buf_chi, beta_r, beta_i ); }
// // Define object-based interface. // void bli_scalv( obj_t* alpha, obj_t* x ) { num_t dt = bli_obj_datatype( *x ); dim_t n = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); obj_t alpha_local; void* buf_alpha; FUNCPTR_T f = ftypes[dt]; if ( bli_error_checking_is_enabled() ) bli_scalv_check( alpha, x ); // Create a local copy-cast of alpha (and apply internal conjugation // if needed). bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Extract the scalar buffer. buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); // Invoke the void pointer-based function. f( BLIS_NO_CONJUGATE, // conjugation applied during copy-cast. n, buf_alpha, buf_x, inc_x ); }
void bli_scalv_int( obj_t* beta, obj_t* x, scalv_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_scalv_int_check( beta, x, cntl ); // First check if we are to skip this operation. if ( cntl_is_noop( cntl ) ) return; // Return early if one of the matrix operands has a zero dimension. if ( bli_obj_has_zero_dim( *x ) ) return; // Return early if the beta scalar equals one. if ( bli_obj_equals( beta, &BLIS_ONE ) ) return; // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( beta, x ); }
// // Define object-based interface. // void bli_normfv( obj_t* x, obj_t* norm ) { if ( bli_error_checking_is_enabled() ) bli_normfv_check( x, norm ); bli_normfv_unb_var1( x, norm ); }
// // Define object-based interface. // void bli_addsc( obj_t* chi, obj_t* psi ) { if ( bli_error_checking_is_enabled() ) bli_addsc_check( chi, psi ); bli_addsc_unb_var1( chi, psi ); }
// // Define object-based interface. // void bli_subd( obj_t* x, obj_t* y ) { if ( bli_error_checking_is_enabled() ) bli_subd_check( x, y ); bli_subd_unb_var1( x, y ); }
// // Define object-based interface. // void bli_copym( obj_t* x, obj_t* y ) { if ( bli_error_checking_is_enabled() ) bli_copym_check( x, y ); bli_copym_unb_var1( x, y ); }
// // Define object-based interface. // void bli_normfsc( obj_t* chi, obj_t* norm ) { if ( bli_error_checking_is_enabled() ) bli_normfsc_check( chi, norm ); bli_normfsc_unb_var1( chi, norm ); }
// // Define object-based interface. // void bli_unzipsc( obj_t* beta, obj_t* chi_r, obj_t* chi_i ) { if ( bli_error_checking_is_enabled() ) bli_unzipsc_check( beta, chi_r, chi_i ); bli_unzipsc_unb_var1( beta, chi_r, chi_i ); }
// // Define object-based interface. // void bli_zipsc( obj_t* beta_r, obj_t* beta_i, obj_t* chi ) { if ( bli_error_checking_is_enabled() ) bli_zipsc_check( beta_r, beta_i, chi ); bli_zipsc_unb_var1( beta_r, beta_i, chi ); }
// // Define object-based interface. // void bli_mksymm( obj_t* a ) { if ( bli_error_checking_is_enabled() ) bli_mksymm_check( a ); bli_mksymm_unb_var1( a ); // Mark the original object as dense. //bli_obj_set_uplo( BLIS_DENSE, *a ); }
// // Define object-based interface. // void bli_scalm( obj_t* beta, obj_t* x ) { if ( bli_error_checking_is_enabled() ) bli_scalm_check( beta, x ); bli_scalm_int( beta, x, scalm_cntl ); }
void* bli_malloc_align ( malloc_ft f, size_t size, size_t align_size ) { const size_t ptr_size = sizeof( void* ); size_t align_offset = 0; void* p_orig; int8_t* p_byte; void** p_addr; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_malloc_align_check( f, size, align_size ); // Return early if zero bytes were requested. if ( size == 0 ) return NULL; // Add the alignment size and the size of a pointer to the number // of bytes to allocate. size += align_size + ptr_size; // Call the allocation function. p_orig = f( size ); // If NULL was returned, something is probably very wrong. if ( p_orig == NULL ) bli_abort(); // Advance the pointer by one pointer element. p_byte = p_orig; p_byte += ptr_size; // Compute the offset to the desired alignment. if ( bli_is_unaligned_to( ( siz_t )p_byte, ( siz_t )align_size ) ) { align_offset = align_size - bli_offset_past_alignment( ( siz_t )p_byte, ( siz_t )align_size ); } // Advance the pointer using the difference between the alignment // size and the alignment offset. p_byte += align_offset; // Compute the address of the pointer element just before the start // of the aligned address, and store the original address there. p_addr = ( void** )(p_byte - ptr_size); *p_addr = p_orig; // Return the aligned pointer. return p_byte; }
void bli_obj_create_const_copy_of( obj_t* a, obj_t* b ) { gint_t* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; void* buf_a; dcomplex value; if ( bli_error_checking_is_enabled() ) bli_obj_create_const_copy_of_check( a, b ); bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, b ); temp_s = bli_obj_buffer_for_const( BLIS_FLOAT, *b ); temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE, *b ); temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *b ); temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *b ); temp_i = bli_obj_buffer_for_const( BLIS_INT, *b ); buf_a = bli_obj_buffer_at_off( *a ); bli_zzsets( 0.0, 0.0, value ); if ( bli_obj_is_float( *a ) ) { bli_szcopys( *(( float* )buf_a), value ); } else if ( bli_obj_is_double( *a ) ) { bli_dzcopys( *(( double* )buf_a), value ); } else if ( bli_obj_is_scomplex( *a ) ) { bli_czcopys( *(( scomplex* )buf_a), value ); } else if ( bli_obj_is_dcomplex( *a ) ) { bli_zzcopys( *(( dcomplex* )buf_a), value ); } else { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } bli_zscopys( value, *temp_s ); bli_zdcopys( value, *temp_d ); bli_zccopys( value, *temp_c ); bli_zzcopys( value, *temp_z ); *temp_i = ( gint_t ) bli_zreal( value ); }
void bli_obj_print( char* label, obj_t* obj ) { FILE* file = stdout; if ( bli_error_checking_is_enabled() ) bli_obj_print_check( label, obj ); fprintf( file, "\n" ); fprintf( file, "%s\n", label ); fprintf( file, "\n" ); fprintf( file, " m x n %lu x %lu\n", ( unsigned long int )bli_obj_length( *obj ), ( unsigned long int )bli_obj_width( *obj ) ); fprintf( file, "\n" ); fprintf( file, " offm, offn %lu, %lu\n", ( unsigned long int )bli_obj_row_off( *obj ), ( unsigned long int )bli_obj_col_off( *obj ) ); fprintf( file, " diagoff %ld\n", ( signed long int )bli_obj_diag_offset( *obj ) ); fprintf( file, "\n" ); fprintf( file, " buf %p\n", ( void* )bli_obj_buffer( *obj ) ); fprintf( file, " elem size %lu\n", ( unsigned long int )bli_obj_elem_size( *obj ) ); fprintf( file, " rs, cs %ld, %ld\n", ( signed long int )bli_obj_row_stride( *obj ), ( signed long int )bli_obj_col_stride( *obj ) ); fprintf( file, " is %ld\n", ( signed long int )bli_obj_imag_stride( *obj ) ); fprintf( file, " m_padded %lu\n", ( unsigned long int )bli_obj_padded_length( *obj ) ); fprintf( file, " n_padded %lu\n", ( unsigned long int )bli_obj_padded_width( *obj ) ); fprintf( file, " ps %lu\n", ( unsigned long int )bli_obj_panel_stride( *obj ) ); fprintf( file, "\n" ); fprintf( file, " info %lX\n", ( unsigned long int )(*obj).info ); fprintf( file, " - is complex %lu\n", ( unsigned long int )bli_obj_is_complex( *obj ) ); fprintf( file, " - is d. prec %lu\n", ( unsigned long int )bli_obj_is_double_precision( *obj ) ); fprintf( file, " - datatype %lu\n", ( unsigned long int )bli_obj_datatype( *obj ) ); fprintf( file, " - target dt %lu\n", ( unsigned long int )bli_obj_target_datatype( *obj ) ); fprintf( file, " - exec dt %lu\n", ( unsigned long int )bli_obj_execution_datatype( *obj ) ); fprintf( file, " - has trans %lu\n", ( unsigned long int )bli_obj_has_trans( *obj ) ); fprintf( file, " - has conj %lu\n", ( unsigned long int )bli_obj_has_conj( *obj ) ); fprintf( file, " - unit diag? %lu\n", ( unsigned long int )bli_obj_has_unit_diag( *obj ) ); fprintf( file, " - struc type %lu\n", ( unsigned long int )bli_obj_struc( *obj ) >> BLIS_STRUC_SHIFT ); fprintf( file, " - uplo type %lu\n", ( unsigned long int )bli_obj_uplo( *obj ) >> BLIS_UPLO_SHIFT ); fprintf( file, " - is upper %lu\n", ( unsigned long int )bli_obj_is_upper( *obj ) ); fprintf( file, " - is lower %lu\n", ( unsigned long int )bli_obj_is_lower( *obj ) ); fprintf( file, " - is dense %lu\n", ( unsigned long int )bli_obj_is_dense( *obj ) ); fprintf( file, " - pack schema %lu\n", ( unsigned long int )bli_obj_pack_schema( *obj ) >> BLIS_PACK_SCHEMA_SHIFT ); fprintf( file, " - packinv diag? %lu\n", ( unsigned long int )bli_obj_has_inverted_diag( *obj ) ); fprintf( file, " - pack ordifup %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_upper( *obj ) ); fprintf( file, " - pack ordiflo %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_lower( *obj ) ); fprintf( file, " - packbuf type %lu\n", ( unsigned long int )bli_obj_pack_buffer_type( *obj ) >> BLIS_PACK_BUFFER_SHIFT ); fprintf( file, "\n" ); }
void bli_her_int( conj_t conjh, obj_t* alpha, obj_t* x, obj_t* c, her_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t x_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_her_int_check( conjh, alpha, x, c, cntl ); // If C or x has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; if ( bli_obj_has_zero_dim( *x ) ) return; // Alias the operands in case we need to apply conjugations. bli_obj_alias_to( *x, x_local ); bli_obj_alias_to( *c, c_local ); // If matrix C is marked for conjugation, we interpret this as a request // to apply a conjugation to the other operands. if ( bli_obj_has_conj( c_local ) ) { bli_obj_toggle_conj( c_local ); // Notice that we don't need to conjugate alpha since it is guaranteed // to be real. bli_obj_toggle_conj( x_local ); } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( conjh, alpha, &x_local, &c_local, cntl ); }
void bli_obj_free( obj_t* obj ) { if ( bli_error_checking_is_enabled() ) bli_obj_free_check( obj ); // Don't dereference obj if it is NULL. if ( obj != NULL ) { // Idiot safety: Don't try to free the buffer field if the object // is a detached scalar (ie: if the buffer pointer refers to the // address of the internal scalar buffer). if ( bli_obj_buffer( *obj ) != bli_obj_internal_scalar_buffer( *obj ) ) bli_free_user( bli_obj_buffer( *obj ) ); } }
void bli_fprintm( FILE* file, char* s1, obj_t* x, char* format, char* s2 ) { num_t dt_x = bli_obj_datatype( *x ); dim_t m = bli_obj_length( *x ); dim_t n = bli_obj_width( *x ); inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); FUNCPTR_T f; if ( bli_error_checking_is_enabled() ) bli_fprintm_check( file, s1, x, format, s2 ); // Handle constants up front. if ( dt_x == BLIS_CONSTANT ) { float* sp = bli_obj_buffer_for_const( BLIS_FLOAT, *x ); double* dp = bli_obj_buffer_for_const( BLIS_DOUBLE, *x ); scomplex* cp = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *x ); dcomplex* zp = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *x ); gint_t* ip = bli_obj_buffer_for_const( BLIS_INT, *x ); fprintf( file, "%s\n", s1 ); fprintf( file, " float: %9.2e\n", bli_sreal( *sp ) ); fprintf( file, " double: %9.2e\n", bli_dreal( *dp ) ); fprintf( file, " scomplex: %9.2e + %9.2e\n", bli_creal( *cp ), bli_cimag( *cp ) ); fprintf( file, " dcomplex: %9.2e + %9.2e\n", bli_zreal( *zp ), bli_zimag( *zp ) ); fprintf( file, " int: %ld\n", *ip ); fprintf( file, "\n" ); return; } // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x]; // Invoke the function. f( file, s1, m, n, buf_x, rs_x, cs_x, format, s2 ); }
void bli_obj_attach_buffer( void* p, inc_t rs, inc_t cs, inc_t is, obj_t* obj ) { // Check that the strides and lengths are compatible. Note that the // user *must* specify valid row and column strides when attaching an // external buffer. if ( bli_error_checking_is_enabled() ) bli_obj_attach_buffer_check( p, rs, cs, is, obj ); // Update the object. bli_obj_set_buffer( p, *obj ); bli_obj_set_strides( rs, cs, *obj ); bli_obj_set_imag_stride( is, *obj ); }
void bli_obj_create_without_buffer( num_t dt, dim_t m, dim_t n, obj_t* obj ) { siz_t elem_size; void* s; if ( bli_error_checking_is_enabled() ) bli_obj_create_without_buffer_check( dt, m, n, obj ); // Query the size of one element of the object's pre-set datatype. elem_size = bli_datatype_size( dt ); // Set any default properties that are appropriate. bli_obj_set_defaults( *obj ); // Set the object root to itself, since obj is not presumed to be a view // into a larger matrix. This is typically the only time this field is // ever set; henceforth, subpartitions and aliases to this object will // get copies of this field, and thus always have access to its // "greatest-grand" parent (ie: the original parent, or "root", object). // However, there ARE a few places where it is convenient to reset the // root field explicitly via bli_obj_set_as_root(). (We do not list // those places here. Just grep for bli_obj_set_as_root within the // top-level 'frame' directory to see them. bli_obj_set_as_root( *obj ); // Set individual fields. bli_obj_set_buffer( NULL, *obj ); bli_obj_set_datatype( dt, *obj ); bli_obj_set_elem_size( elem_size, *obj ); bli_obj_set_target_datatype( dt, *obj ); bli_obj_set_execution_datatype( dt, *obj ); bli_obj_set_dims( m, n, *obj ); bli_obj_set_offs( 0, 0, *obj ); bli_obj_set_diag_offset( 0, *obj ); // Set the internal scalar to 1.0. s = bli_obj_internal_scalar_buffer( *obj ); if ( bli_is_float( dt ) ) { bli_sset1s( *(( float* )s) ); } else if ( bli_is_double( dt ) ) { bli_dset1s( *(( double* )s) ); } else if ( bli_is_scomplex( dt ) ) { bli_cset1s( *(( scomplex* )s) ); } else if ( bli_is_dcomplex( dt ) ) { bli_zset1s( *(( dcomplex* )s) ); } }
void bli_scalm_int( obj_t* beta, obj_t* x, scalm_t* cntl ) { obj_t x_local; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_scalm_int_check( beta, x, cntl ); // First check if we are to skip this operation. if ( cntl_is_noop( cntl ) ) return; // Return early if one of the matrix operands has a zero dimension. if ( bli_obj_has_zero_dim( *x ) ) return; // Return early if both beta and the scalar attached to x are unit. if ( bli_obj_equals( beta, &BLIS_ONE ) && bli_obj_scalar_equals( x, &BLIS_ONE ) ) return; // Alias x to x_local so we can apply beta if it is non-unit. bli_obj_alias_to( *x, x_local ); // If beta is non-unit, apply it to the scalar attached to x. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &x_local ); } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( &x_local ); }
// // Define object-based interface. // void bli_setm( obj_t* beta, obj_t* x ) { num_t dt_x; obj_t beta_local; if ( bli_error_checking_is_enabled() ) bli_setm_check( beta, x ); // Use the datatype of x as the target type for beta (since we do // not assume mixed domain/type support is enabled). dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of beta. bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); bli_setm_unb_var1( &beta_local, x ); }
void bli_syr2_front ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx ) { her2_t* her2_cntl; num_t dt_targ_x; num_t dt_targ_y; //num_t dt_targ_c; bool_t x_has_unit_inc; bool_t y_has_unit_inc; bool_t c_has_unit_inc; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syr2_check( alpha, x, y, c ); // Query the target datatypes of each object. dt_targ_x = bli_obj_target_dt( x ); dt_targ_y = bli_obj_target_dt( y ); //dt_targ_c = bli_obj_target_dt( c ); // Determine whether each operand with unit stride. x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 ); c_has_unit_inc = ( bli_obj_is_row_stored( c ) || bli_obj_is_col_stored( c ) ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the datatypes of x and y. dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( x_has_unit_inc && y_has_unit_inc && c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol; else her2_cntl = her2_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow; else her2_cntl = her2_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol; else her2_cntl = her2_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow; else her2_cntl = her2_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-cast scalar and the // chosen control tree. Set conjh to BLIS_NO_CONJUGATE to invoke the // symmetric (and not Hermitian) algorithms. bli_her2_int( BLIS_NO_CONJUGATE, &alpha_local, &alpha_local, x, y, c, cntx, her2_cntl ); }
void bli_her2k_front( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, herk_t* cntl ) { obj_t alpha_conj; obj_t c_local; obj_t a_local; obj_t bh_local; obj_t b_local; obj_t ah_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_her2k_check( alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); bli_obj_alias_to( *c, c_local ); bli_obj_set_as_root( c_local ); // For her2k, the first and second right-hand "B" operands are simply B' // and A'. bli_obj_alias_to( *b, bh_local ); bli_obj_induce_trans( bh_local ); bli_obj_toggle_conj( bh_local ); bli_obj_alias_to( *a, ah_local ); bli_obj_induce_trans( ah_local ); bli_obj_toggle_conj( ah_local ); // Initialize a conjugated copy of alpha. bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ), BLIS_CONJUGATE, alpha, &alpha_conj ); // An optimization: If C is row-stored, transpose the entire operation // so as to allow the macro-kernel more favorable access patterns // through C. (The effect of the transposition of A and A' is negligible // because those operands are always packed to contiguous memory.) if ( bli_obj_is_row_stored( c_local ) ) { bli_obj_swap( a_local, bh_local ); bli_obj_swap( b_local, ah_local ); bli_obj_induce_trans( a_local ); bli_obj_induce_trans( bh_local ); bli_obj_induce_trans( b_local ); bli_obj_induce_trans( ah_local ); bli_obj_induce_trans( c_local ); } #if 0 // Invoke the internal back-end. bli_her2k_int( alpha, &a_local, &bh_local, &alpha_conj, &b_local, &ah_local, beta, &c_local, cntl ); #else // Invoke herk twice, using beta only the first time. bli_herk_int( alpha, &a_local, &bh_local, beta, &c_local, cntl ); bli_herk_int( &alpha_conj, &b_local, &ah_local, &BLIS_ONE, &c_local, cntl ); #endif }
void bli_her2k_front( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, gemm_t* cntl ) { obj_t alpha_conj; obj_t c_local; obj_t a_local; obj_t bh_local; obj_t b_local; obj_t ah_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_her2k_check( alpha, a, b, beta, c ); // If alpha is zero, scale by beta, zero the imaginary components of // the diagonal elements, and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); bli_setid( &BLIS_ZERO, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); bli_obj_alias_to( *c, c_local ); bli_obj_set_as_root( c_local ); // For her2k, the first and second right-hand "B" operands are simply B' // and A'. bli_obj_alias_to( *b, bh_local ); bli_obj_induce_trans( bh_local ); bli_obj_toggle_conj( bh_local ); bli_obj_alias_to( *a, ah_local ); bli_obj_induce_trans( ah_local ); bli_obj_toggle_conj( ah_local ); // Initialize a conjugated copy of alpha. bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ), BLIS_CONJUGATE, alpha, &alpha_conj ); // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( ( bli_obj_is_row_stored( c_local ) && bli_func_prefers_contig_cols( bli_obj_datatype( c_local ), bli_gemm_cntl_ukrs( cntl ) ) ) || ( bli_obj_is_col_stored( c_local ) && bli_func_prefers_contig_rows( bli_obj_datatype( c_local ), bli_gemm_cntl_ukrs( cntl ) ) ) ) { bli_obj_swap( a_local, bh_local ); bli_obj_swap( b_local, ah_local ); bli_obj_induce_trans( a_local ); bli_obj_induce_trans( bh_local ); bli_obj_induce_trans( b_local ); bli_obj_induce_trans( ah_local ); bli_obj_induce_trans( c_local ); } #if 0 // Invoke the internal back-end. bli_her2k_int( alpha, &a_local, &bh_local, &alpha_conj, &b_local, &ah_local, beta, &c_local, cntl ); #else // Invoke herk twice, using beta only the first time. herk_thrinfo_t** infos = bli_create_herk_thrinfo_paths(); dim_t n_threads = thread_num_threads( infos[0] ); // Invoke the internal back-end. bli_level3_thread_decorator( n_threads, (level3_int_t) bli_herk_int, alpha, &a_local, &bh_local, beta, &c_local, (void*) cntl, (void**) infos ); bli_level3_thread_decorator( n_threads, (level3_int_t) bli_herk_int, &alpha_conj, &b_local, &ah_local, &BLIS_ONE, &c_local, (void*) cntl, (void**) infos ); bli_herk_thrinfo_free_paths( infos, n_threads ); #endif // The Hermitian rank-2k product was computed as A*B'+B*A', even for // the diagonal elements. Mathematically, the imaginary components of // diagonal elements of a Hermitian rank-2k product should always be // zero. However, in practice, they sometimes accumulate meaningless // non-zero values. To prevent this, we explicitly set those values // to zero before returning. bli_setid( &BLIS_ZERO, &c_local ); }