void bli_obj_create_const( double value, obj_t* obj ) { gint_t* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; if ( bli_error_checking_is_enabled() ) bli_obj_create_const_check( value, obj ); bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, obj ); temp_s = bli_obj_buffer_for_const( BLIS_FLOAT, *obj ); temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE, *obj ); temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *obj ); temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *obj ); temp_i = bli_obj_buffer_for_const( BLIS_INT, *obj ); // Use the bli_??sets() macros to set the temp variables in order to // properly support BLIS_ENABLE_C99_COMPLEX. bli_dssets( value, 0.0, *temp_s ); bli_ddsets( value, 0.0, *temp_d ); bli_dcsets( value, 0.0, *temp_c ); bli_dzsets( value, 0.0, *temp_z ); *temp_i = ( gint_t ) value; }
void bli_obj_create_const_copy_of( obj_t* a, obj_t* b ) { gint_t* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; void* buf_a; dcomplex value; if ( bli_error_checking_is_enabled() ) bli_obj_create_const_copy_of_check( a, b ); bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, b ); temp_s = bli_obj_buffer_for_const( BLIS_FLOAT, *b ); temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE, *b ); temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *b ); temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *b ); temp_i = bli_obj_buffer_for_const( BLIS_INT, *b ); buf_a = bli_obj_buffer_at_off( *a ); bli_zzsets( 0.0, 0.0, value ); if ( bli_obj_is_float( *a ) ) { bli_szcopys( *(( float* )buf_a), value ); } else if ( bli_obj_is_double( *a ) ) { bli_dzcopys( *(( double* )buf_a), value ); } else if ( bli_obj_is_scomplex( *a ) ) { bli_czcopys( *(( scomplex* )buf_a), value ); } else if ( bli_obj_is_dcomplex( *a ) ) { bli_zzcopys( *(( dcomplex* )buf_a), value ); } else { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } bli_zscopys( value, *temp_s ); bli_zdcopys( value, *temp_d ); bli_zccopys( value, *temp_c ); bli_zzcopys( value, *temp_z ); *temp_i = ( gint_t ) bli_zreal( value ); }
void bli_fprintm( FILE* file, char* s1, obj_t* x, char* format, char* s2 ) { num_t dt_x = bli_obj_datatype( *x ); dim_t m = bli_obj_length( *x ); dim_t n = bli_obj_width( *x ); inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); FUNCPTR_T f; if ( bli_error_checking_is_enabled() ) bli_fprintm_check( file, s1, x, format, s2 ); // Handle constants up front. if ( dt_x == BLIS_CONSTANT ) { float* sp = bli_obj_buffer_for_const( BLIS_FLOAT, *x ); double* dp = bli_obj_buffer_for_const( BLIS_DOUBLE, *x ); scomplex* cp = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *x ); dcomplex* zp = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *x ); gint_t* ip = bli_obj_buffer_for_const( BLIS_INT, *x ); fprintf( file, "%s\n", s1 ); fprintf( file, " float: %9.2e\n", bli_sreal( *sp ) ); fprintf( file, " double: %9.2e\n", bli_dreal( *dp ) ); fprintf( file, " scomplex: %9.2e + %9.2e\n", bli_creal( *cp ), bli_cimag( *cp ) ); fprintf( file, " dcomplex: %9.2e + %9.2e\n", bli_zreal( *zp ), bli_zimag( *zp ) ); fprintf( file, " int: %ld\n", *ip ); fprintf( file, "\n" ); return; } // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x]; // Invoke the function. f( file, s1, m, n, buf_x, rs_x, cs_x, format, s2 ); }
void bli_packm_unb_var1( obj_t* c, obj_t* p, packm_thrinfo_t* thread ) { num_t dt_cp = bli_obj_datatype( *c ); struc_t strucc = bli_obj_struc( *c ); doff_t diagoffc = bli_obj_diag_offset( *c ); diag_t diagc = bli_obj_diag( *c ); uplo_t uploc = bli_obj_uplo( *c ); trans_t transc = bli_obj_conjtrans_status( *c ); dim_t m_p = bli_obj_length( *p ); dim_t n_p = bli_obj_width( *p ); dim_t m_max_p = bli_obj_padded_length( *p ); dim_t n_max_p = bli_obj_padded_width( *p ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); void* buf_p = bli_obj_buffer_at_off( *p ); inc_t rs_p = bli_obj_row_stride( *p ); inc_t cs_p = bli_obj_col_stride( *p ); void* buf_kappa; FUNCPTR_T f; // This variant assumes that the computational kernel will always apply // the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE // for kappa so that the underlying packm implementation does not scale // during packing. buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; if( thread_am_ochief( thread ) ) { // Invoke the function. f( strucc, diagoffc, diagc, uploc, transc, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p ); } }
void bli_obj_scalar_reset( obj_t* a ) { num_t dt = bli_obj_datatype( *a ); void* scalar_a = bli_obj_internal_scalar_buffer( *a ); void* one = bli_obj_buffer_for_const( dt, BLIS_ONE ); if ( bli_is_float( dt ) ) *(( float* )scalar_a) = *(( float* )one); else if ( bli_is_double( dt ) ) *(( double* )scalar_a) = *(( double* )one); else if ( bli_is_scomplex( dt ) ) *(( scomplex* )scalar_a) = *(( scomplex* )one); else if ( bli_is_dcomplex( dt ) ) *(( dcomplex* )scalar_a) = *(( dcomplex* )one); // Alternate implementation: //bli_obj_scalar_attach( BLIS_NO_CONJUGATE, &BLIS_ONE, a ); }
void bli_packm_blk_var1( obj_t* c, obj_t* p, packm_thrinfo_t* t ) { num_t dt_cp = bli_obj_datatype( *c ); struc_t strucc = bli_obj_struc( *c ); doff_t diagoffc = bli_obj_diag_offset( *c ); diag_t diagc = bli_obj_diag( *c ); uplo_t uploc = bli_obj_uplo( *c ); trans_t transc = bli_obj_conjtrans_status( *c ); pack_t schema = bli_obj_pack_schema( *p ); bool_t invdiag = bli_obj_has_inverted_diag( *p ); bool_t revifup = bli_obj_is_pack_rev_if_upper( *p ); bool_t reviflo = bli_obj_is_pack_rev_if_lower( *p ); dim_t m_p = bli_obj_length( *p ); dim_t n_p = bli_obj_width( *p ); dim_t m_max_p = bli_obj_padded_length( *p ); dim_t n_max_p = bli_obj_padded_width( *p ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); void* buf_p = bli_obj_buffer_at_off( *p ); inc_t rs_p = bli_obj_row_stride( *p ); inc_t cs_p = bli_obj_col_stride( *p ); inc_t is_p = bli_obj_imag_stride( *p ); dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); obj_t kappa; /*---initialize pointer to stop gcc complaining 2-9-16 GH --- */ obj_t* kappa_p = {0}; void* buf_kappa; func_t* packm_kers; void* packm_ker; FUNCPTR_T f; // Treatment of kappa (ie: packing during scaling) depends on // whether we are executing an induced method. if ( bli_is_ind_packed( schema ) ) { // The value for kappa we use will depend on whether the scalar // attached to A has a nonzero imaginary component. If it does, // then we will apply the scalar during packing to facilitate // implementing induced complex domain algorithms in terms of // real domain micro-kernels. (In the aforementioned situation, // applying a real scalar is easy, but applying a complex one is // harder, so we avoid the need altogether with the code below.) if( thread_am_ochief( t ) ) { if ( bli_obj_scalar_has_nonzero_imag( p ) ) { // Detach the scalar. bli_obj_scalar_detach( p, &kappa ); // Reset the attached scalar (to 1.0). bli_obj_scalar_reset( p ); kappa_p = κ } else { // If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE; } } kappa_p = thread_obroadcast( t, kappa_p ); // Acquire the buffer to the kappa chosen above. buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p ); } else // if ( bli_is_nat_packed( schema ) ) { // This branch if for native execution, where we assume that // the micro-kernel will always apply the alpha scalar of the // higher-level operation. Thus, we use BLIS_ONE for kappa so // that the underlying packm implementation does not perform // any scaling during packing. buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); } // Choose the correct func_t object based on the pack_t schema. if ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4mi_kers; else if ( bli_is_3mi_packed( schema ) || bli_is_3ms_packed( schema ) ) packm_kers = packm_struc_cxk_3mis_kers; else if ( bli_is_ro_packed( schema ) || bli_is_io_packed( schema ) || bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers; else packm_kers = packm_struc_cxk_kers; // Query the datatype-specific function pointer from the func_t object. packm_ker = bli_func_obj_query( dt_cp, packm_kers ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; // Invoke the function. f( strucc, diagoffc, diagc, uploc, transc, schema, invdiag, revifup, reviflo, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, is_p, pd_p, ps_p, packm_ker, t ); }
void bli_packm_blk_var1_md ( obj_t* c, obj_t* p, cntx_t* cntx, cntl_t* cntl, thrinfo_t* t ) { num_t dt_c = bli_obj_dt( c ); num_t dt_p = bli_obj_dt( p ); trans_t transc = bli_obj_conjtrans_status( c ); pack_t schema = bli_obj_pack_schema( p ); dim_t m_p = bli_obj_length( p ); dim_t n_p = bli_obj_width( p ); dim_t m_max_p = bli_obj_padded_length( p ); dim_t n_max_p = bli_obj_padded_width( p ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_p = bli_obj_buffer_at_off( p ); inc_t rs_p = bli_obj_row_stride( p ); inc_t cs_p = bli_obj_col_stride( p ); inc_t is_p = bli_obj_imag_stride( p ); dim_t pd_p = bli_obj_panel_dim( p ); inc_t ps_p = bli_obj_panel_stride( p ); obj_t kappa; void* buf_kappa; FUNCPTR_T f; // Treatment of kappa (ie: packing during scaling) depends on // whether we are executing an induced method. if ( bli_is_nat_packed( schema ) ) { // This branch is for native execution, where we assume that // the micro-kernel will always apply the alpha scalar of the // higher-level operation. Thus, we use BLIS_ONE for kappa so // that the underlying packm implementation does not perform // any scaling during packing. buf_kappa = bli_obj_buffer_for_const( dt_p, &BLIS_ONE ); } else // if ( bli_is_ind_packed( schema ) ) { obj_t* kappa_p; // The value for kappa we use will depend on whether the scalar // attached to A has a nonzero imaginary component. If it does, // then we will apply the scalar during packing to facilitate // implementing induced complex domain algorithms in terms of // real domain micro-kernels. (In the aforementioned situation, // applying a real scalar is easy, but applying a complex one is // harder, so we avoid the need altogether with the code below.) if ( bli_obj_scalar_has_nonzero_imag( p ) ) { // Detach the scalar. bli_obj_scalar_detach( p, &kappa ); // Reset the attached scalar (to 1.0). bli_obj_scalar_reset( p ); kappa_p = κ } else { // If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE; } // Acquire the buffer to the kappa chosen above. buf_kappa = bli_obj_buffer_for_1x1( dt_p, kappa_p ); } // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_c][dt_p]; // Invoke the function. f( transc, schema, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, is_p, pd_p, ps_p, cntx, t ); }