void bli_gemv_unb_var2( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, gemv_t* cntl ) { num_t dt_a = bli_obj_datatype( *a ); num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); conj_t transa = bli_obj_conjtrans_status( *a ); conj_t conjx = bli_obj_conj_status( *x ); dim_t m = bli_obj_length( *a ); dim_t n = bli_obj_width( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t incx = bli_obj_vector_inc( *x ); void* buf_y = bli_obj_buffer_at_off( *y ); inc_t incy = bli_obj_vector_inc( *y ); num_t dt_alpha; void* buf_alpha; num_t dt_beta; void* buf_beta; FUNCPTR_T f; // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a][dt_x][dt_y]; // Invoke the function. f( transa, conjx, m, n, buf_alpha, buf_a, rs_a, cs_a, buf_x, incx, buf_beta, buf_y, incy ); }
void bli_packm_unb_var1( obj_t* c, obj_t* p, packm_thrinfo_t* thread ) { num_t dt_cp = bli_obj_datatype( *c ); struc_t strucc = bli_obj_struc( *c ); doff_t diagoffc = bli_obj_diag_offset( *c ); diag_t diagc = bli_obj_diag( *c ); uplo_t uploc = bli_obj_uplo( *c ); trans_t transc = bli_obj_conjtrans_status( *c ); dim_t m_p = bli_obj_length( *p ); dim_t n_p = bli_obj_width( *p ); dim_t m_max_p = bli_obj_padded_length( *p ); dim_t n_max_p = bli_obj_padded_width( *p ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); void* buf_p = bli_obj_buffer_at_off( *p ); inc_t rs_p = bli_obj_row_stride( *p ); inc_t cs_p = bli_obj_col_stride( *p ); void* buf_kappa; FUNCPTR_T f; // This variant assumes that the computational kernel will always apply // the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE // for kappa so that the underlying packm implementation does not scale // during packing. buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; if( thread_am_ochief( thread ) ) { // Invoke the function. f( strucc, diagoffc, diagc, uploc, transc, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p ); } }
void bli_axpym_unb_var1( obj_t* alpha, obj_t* x, obj_t* y, cntx_t* cntx ) { num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); doff_t diagoffx = bli_obj_diag_offset( *x ); diag_t diagx = bli_obj_diag( *x ); uplo_t uplox = bli_obj_uplo( *x ); trans_t transx = bli_obj_conjtrans_status( *x ); dim_t m = bli_obj_length( *y ); dim_t n = bli_obj_width( *y ); inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t rs_y = bli_obj_row_stride( *y ); inc_t cs_y = bli_obj_col_stride( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; // If alpha is a scalar constant, use dt_x to extract the address of the // corresponding constant value; otherwise, use the datatype encoded // within the alpha object and extract the buffer at the alpha offset. bli_set_scalar_dt_buffer( alpha, dt_x, dt_alpha, buf_alpha ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_alpha][dt_x][dt_y]; // Invoke the function. f( diagoffx, diagx, uplox, transx, m, n, buf_alpha, buf_x, rs_x, cs_x, buf_y, rs_y, cs_y ); }
void bli_trmv_unf_var2( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ) { num_t dt_a = bli_obj_datatype( *a ); num_t dt_x = bli_obj_datatype( *x ); uplo_t uplo = bli_obj_uplo( *a ); trans_t trans = bli_obj_conjtrans_status( *a ); diag_t diag = bli_obj_diag( *a ); dim_t m = bli_obj_length( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t incx = bli_obj_vector_inc( *x ); num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a][dt_x]; // Invoke the function. f( uplo, trans, diag, m, buf_alpha, buf_a, rs_a, cs_a, buf_x, incx ); }
void bli_addm_unb_var1( obj_t* x, obj_t* y, cntx_t* cntx ) { num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); doff_t diagoffx = bli_obj_diag_offset( *x ); diag_t diagx = bli_obj_diag( *x ); uplo_t uplox = bli_obj_uplo( *x ); trans_t transx = bli_obj_conjtrans_status( *x ); dim_t m = bli_obj_length( *y ); dim_t n = bli_obj_width( *y ); inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t rs_y = bli_obj_row_stride( *y ); inc_t cs_y = bli_obj_col_stride( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x][dt_y]; // Invoke the function. f( diagoffx, diagx, uplox, transx, m, n, buf_x, rs_x, cs_x, buf_y, rs_y, cs_y ); }
void libblis_test_trmv_check( obj_t* alpha, obj_t* a, obj_t* x, obj_t* x_orig, double* resid ) { num_t dt = bli_obj_datatype( *x ); num_t dt_real = bli_obj_datatype_proj_to_real( *x ); dim_t m = bli_obj_vector_dim( *x ); uplo_t uploa = bli_obj_uplo( *a ); trans_t transa = bli_obj_conjtrans_status( *a ); obj_t a_local, y; obj_t norm; double junk; // // Pre-conditions: // - a is randomized and triangular. // - x is randomized. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // x := alpha * transa(A) * x_orig // // is functioning correctly if // // fnorm( y - x ) // // is negligible, where // // y = alpha * conja(A_dense) * x_orig // bli_obj_init_scalar( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &y ); bli_obj_create( dt, m, m, 0, 0, &a_local ); bli_obj_set_struc( BLIS_TRIANGULAR, a_local ); bli_obj_set_uplo( uploa, a_local ); bli_obj_toggle_uplo_if_trans( transa, a_local ); bli_copym( a, &a_local ); bli_mktrim( &a_local ); bli_obj_set_struc( BLIS_GENERAL, a_local ); bli_obj_set_uplo( BLIS_DENSE, a_local ); bli_gemv( alpha, &a_local, x_orig, &BLIS_ZERO, &y ); bli_subv( x, &y ); bli_fnormv( &y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &y ); bli_obj_free( &a_local ); }
void bli_packm_blk_var1( obj_t* c, obj_t* p, packm_thrinfo_t* t ) { num_t dt_cp = bli_obj_datatype( *c ); struc_t strucc = bli_obj_struc( *c ); doff_t diagoffc = bli_obj_diag_offset( *c ); diag_t diagc = bli_obj_diag( *c ); uplo_t uploc = bli_obj_uplo( *c ); trans_t transc = bli_obj_conjtrans_status( *c ); pack_t schema = bli_obj_pack_schema( *p ); bool_t invdiag = bli_obj_has_inverted_diag( *p ); bool_t revifup = bli_obj_is_pack_rev_if_upper( *p ); bool_t reviflo = bli_obj_is_pack_rev_if_lower( *p ); dim_t m_p = bli_obj_length( *p ); dim_t n_p = bli_obj_width( *p ); dim_t m_max_p = bli_obj_padded_length( *p ); dim_t n_max_p = bli_obj_padded_width( *p ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); void* buf_p = bli_obj_buffer_at_off( *p ); inc_t rs_p = bli_obj_row_stride( *p ); inc_t cs_p = bli_obj_col_stride( *p ); inc_t is_p = bli_obj_imag_stride( *p ); dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); obj_t kappa; /*---initialize pointer to stop gcc complaining 2-9-16 GH --- */ obj_t* kappa_p = {0}; void* buf_kappa; func_t* packm_kers; void* packm_ker; FUNCPTR_T f; // Treatment of kappa (ie: packing during scaling) depends on // whether we are executing an induced method. if ( bli_is_ind_packed( schema ) ) { // The value for kappa we use will depend on whether the scalar // attached to A has a nonzero imaginary component. If it does, // then we will apply the scalar during packing to facilitate // implementing induced complex domain algorithms in terms of // real domain micro-kernels. (In the aforementioned situation, // applying a real scalar is easy, but applying a complex one is // harder, so we avoid the need altogether with the code below.) if( thread_am_ochief( t ) ) { if ( bli_obj_scalar_has_nonzero_imag( p ) ) { // Detach the scalar. bli_obj_scalar_detach( p, &kappa ); // Reset the attached scalar (to 1.0). bli_obj_scalar_reset( p ); kappa_p = κ } else { // If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE; } } kappa_p = thread_obroadcast( t, kappa_p ); // Acquire the buffer to the kappa chosen above. buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p ); } else // if ( bli_is_nat_packed( schema ) ) { // This branch if for native execution, where we assume that // the micro-kernel will always apply the alpha scalar of the // higher-level operation. Thus, we use BLIS_ONE for kappa so // that the underlying packm implementation does not perform // any scaling during packing. buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); } // Choose the correct func_t object based on the pack_t schema. if ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4mi_kers; else if ( bli_is_3mi_packed( schema ) || bli_is_3ms_packed( schema ) ) packm_kers = packm_struc_cxk_3mis_kers; else if ( bli_is_ro_packed( schema ) || bli_is_io_packed( schema ) || bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers; else packm_kers = packm_struc_cxk_kers; // Query the datatype-specific function pointer from the func_t object. packm_ker = bli_func_obj_query( dt_cp, packm_kers ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; // Invoke the function. f( strucc, diagoffc, diagc, uploc, transc, schema, invdiag, revifup, reviflo, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, is_p, pd_p, ps_p, packm_ker, t ); }
void bli_packm_blk_var1_md ( obj_t* c, obj_t* p, cntx_t* cntx, cntl_t* cntl, thrinfo_t* t ) { num_t dt_c = bli_obj_dt( c ); num_t dt_p = bli_obj_dt( p ); trans_t transc = bli_obj_conjtrans_status( c ); pack_t schema = bli_obj_pack_schema( p ); dim_t m_p = bli_obj_length( p ); dim_t n_p = bli_obj_width( p ); dim_t m_max_p = bli_obj_padded_length( p ); dim_t n_max_p = bli_obj_padded_width( p ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_p = bli_obj_buffer_at_off( p ); inc_t rs_p = bli_obj_row_stride( p ); inc_t cs_p = bli_obj_col_stride( p ); inc_t is_p = bli_obj_imag_stride( p ); dim_t pd_p = bli_obj_panel_dim( p ); inc_t ps_p = bli_obj_panel_stride( p ); obj_t kappa; void* buf_kappa; FUNCPTR_T f; // Treatment of kappa (ie: packing during scaling) depends on // whether we are executing an induced method. if ( bli_is_nat_packed( schema ) ) { // This branch is for native execution, where we assume that // the micro-kernel will always apply the alpha scalar of the // higher-level operation. Thus, we use BLIS_ONE for kappa so // that the underlying packm implementation does not perform // any scaling during packing. buf_kappa = bli_obj_buffer_for_const( dt_p, &BLIS_ONE ); } else // if ( bli_is_ind_packed( schema ) ) { obj_t* kappa_p; // The value for kappa we use will depend on whether the scalar // attached to A has a nonzero imaginary component. If it does, // then we will apply the scalar during packing to facilitate // implementing induced complex domain algorithms in terms of // real domain micro-kernels. (In the aforementioned situation, // applying a real scalar is easy, but applying a complex one is // harder, so we avoid the need altogether with the code below.) if ( bli_obj_scalar_has_nonzero_imag( p ) ) { // Detach the scalar. bli_obj_scalar_detach( p, &kappa ); // Reset the attached scalar (to 1.0). bli_obj_scalar_reset( p ); kappa_p = κ } else { // If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE; } // Acquire the buffer to the kappa chosen above. buf_kappa = bli_obj_buffer_for_1x1( dt_p, kappa_p ); } // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_c][dt_p]; // Invoke the function. f( transc, schema, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, is_p, pd_p, ps_p, cntx, t ); }