void bli_trmv_unf_var1( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ) { num_t dt_a = bli_obj_datatype( *a ); num_t dt_x = bli_obj_datatype( *x ); uplo_t uplo = bli_obj_uplo( *a ); trans_t trans = bli_obj_conjtrans_status( *a ); diag_t diag = bli_obj_diag( *a ); dim_t m = bli_obj_length( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t incx = bli_obj_vector_inc( *x ); num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a][dt_x]; // Invoke the function. f( uplo, trans, diag, m, buf_alpha, buf_a, rs_a, cs_a, buf_x, incx ); }
void bli_hemv_unb_var1( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ) { num_t dt_a = bli_obj_datatype( *a ); num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); uplo_t uplo = bli_obj_uplo( *a ); conj_t conja = bli_obj_conj_status( *a ); conj_t conjx = bli_obj_conj_status( *x ); dim_t m = bli_obj_length( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t incx = bli_obj_vector_inc( *x ); void* buf_y = bli_obj_buffer_at_off( *y ); inc_t incy = bli_obj_vector_inc( *y ); num_t dt_alpha; void* buf_alpha; num_t dt_beta; void* buf_beta; FUNCPTR_T f; // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a]; // Invoke the function. f( uplo, conja, conjx, conjh, m, buf_alpha, buf_a, rs_a, cs_a, buf_x, incx, buf_beta, buf_y, incy ); }
void bli_packm_blk_var1( obj_t* c, obj_t* p, packm_thrinfo_t* t ) { num_t dt_cp = bli_obj_datatype( *c ); struc_t strucc = bli_obj_struc( *c ); doff_t diagoffc = bli_obj_diag_offset( *c ); diag_t diagc = bli_obj_diag( *c ); uplo_t uploc = bli_obj_uplo( *c ); trans_t transc = bli_obj_conjtrans_status( *c ); pack_t schema = bli_obj_pack_schema( *p ); bool_t invdiag = bli_obj_has_inverted_diag( *p ); bool_t revifup = bli_obj_is_pack_rev_if_upper( *p ); bool_t reviflo = bli_obj_is_pack_rev_if_lower( *p ); dim_t m_p = bli_obj_length( *p ); dim_t n_p = bli_obj_width( *p ); dim_t m_max_p = bli_obj_padded_length( *p ); dim_t n_max_p = bli_obj_padded_width( *p ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); void* buf_p = bli_obj_buffer_at_off( *p ); inc_t rs_p = bli_obj_row_stride( *p ); inc_t cs_p = bli_obj_col_stride( *p ); inc_t is_p = bli_obj_imag_stride( *p ); dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); obj_t kappa; /*---initialize pointer to stop gcc complaining 2-9-16 GH --- */ obj_t* kappa_p = {0}; void* buf_kappa; func_t* packm_kers; void* packm_ker; FUNCPTR_T f; // Treatment of kappa (ie: packing during scaling) depends on // whether we are executing an induced method. if ( bli_is_ind_packed( schema ) ) { // The value for kappa we use will depend on whether the scalar // attached to A has a nonzero imaginary component. If it does, // then we will apply the scalar during packing to facilitate // implementing induced complex domain algorithms in terms of // real domain micro-kernels. (In the aforementioned situation, // applying a real scalar is easy, but applying a complex one is // harder, so we avoid the need altogether with the code below.) if( thread_am_ochief( t ) ) { if ( bli_obj_scalar_has_nonzero_imag( p ) ) { // Detach the scalar. bli_obj_scalar_detach( p, &kappa ); // Reset the attached scalar (to 1.0). bli_obj_scalar_reset( p ); kappa_p = κ } else { // If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE; } } kappa_p = thread_obroadcast( t, kappa_p ); // Acquire the buffer to the kappa chosen above. buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p ); } else // if ( bli_is_nat_packed( schema ) ) { // This branch if for native execution, where we assume that // the micro-kernel will always apply the alpha scalar of the // higher-level operation. Thus, we use BLIS_ONE for kappa so // that the underlying packm implementation does not perform // any scaling during packing. buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); } // Choose the correct func_t object based on the pack_t schema. if ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4mi_kers; else if ( bli_is_3mi_packed( schema ) || bli_is_3ms_packed( schema ) ) packm_kers = packm_struc_cxk_3mis_kers; else if ( bli_is_ro_packed( schema ) || bli_is_io_packed( schema ) || bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers; else packm_kers = packm_struc_cxk_kers; // Query the datatype-specific function pointer from the func_t object. packm_ker = bli_func_obj_query( dt_cp, packm_kers ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; // Invoke the function. f( strucc, diagoffc, diagc, uploc, transc, schema, invdiag, revifup, reviflo, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, is_p, pd_p, ps_p, packm_ker, t ); }
void libblis_test_setv_check( obj_t* beta, obj_t* x, double* resid ) { num_t dt_x = bli_obj_datatype( *x ); dim_t m_x = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); void* buf_beta = bli_obj_buffer_for_1x1( dt_x, *beta ); dim_t i; *resid = 0.0; // // The easiest way to check that setv was successful is to confirm // that each element of x is equal to beta. // if ( bli_obj_is_float( *x ) ) { float* chi1 = buf_x; float* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_seq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } else if ( bli_obj_is_double( *x ) ) { double* chi1 = buf_x; double* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_deq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } else if ( bli_obj_is_scomplex( *x ) ) { scomplex* chi1 = buf_x; scomplex* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_ceq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } else // if ( bli_obj_is_dcomplex( *x ) ) { dcomplex* chi1 = buf_x; dcomplex* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_zeq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } }
void bli_packm_blk_var1_md ( obj_t* c, obj_t* p, cntx_t* cntx, cntl_t* cntl, thrinfo_t* t ) { num_t dt_c = bli_obj_dt( c ); num_t dt_p = bli_obj_dt( p ); trans_t transc = bli_obj_conjtrans_status( c ); pack_t schema = bli_obj_pack_schema( p ); dim_t m_p = bli_obj_length( p ); dim_t n_p = bli_obj_width( p ); dim_t m_max_p = bli_obj_padded_length( p ); dim_t n_max_p = bli_obj_padded_width( p ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_p = bli_obj_buffer_at_off( p ); inc_t rs_p = bli_obj_row_stride( p ); inc_t cs_p = bli_obj_col_stride( p ); inc_t is_p = bli_obj_imag_stride( p ); dim_t pd_p = bli_obj_panel_dim( p ); inc_t ps_p = bli_obj_panel_stride( p ); obj_t kappa; void* buf_kappa; FUNCPTR_T f; // Treatment of kappa (ie: packing during scaling) depends on // whether we are executing an induced method. if ( bli_is_nat_packed( schema ) ) { // This branch is for native execution, where we assume that // the micro-kernel will always apply the alpha scalar of the // higher-level operation. Thus, we use BLIS_ONE for kappa so // that the underlying packm implementation does not perform // any scaling during packing. buf_kappa = bli_obj_buffer_for_const( dt_p, &BLIS_ONE ); } else // if ( bli_is_ind_packed( schema ) ) { obj_t* kappa_p; // The value for kappa we use will depend on whether the scalar // attached to A has a nonzero imaginary component. If it does, // then we will apply the scalar during packing to facilitate // implementing induced complex domain algorithms in terms of // real domain micro-kernels. (In the aforementioned situation, // applying a real scalar is easy, but applying a complex one is // harder, so we avoid the need altogether with the code below.) if ( bli_obj_scalar_has_nonzero_imag( p ) ) { // Detach the scalar. bli_obj_scalar_detach( p, &kappa ); // Reset the attached scalar (to 1.0). bli_obj_scalar_reset( p ); kappa_p = κ } else { // If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE; } // Acquire the buffer to the kappa chosen above. buf_kappa = bli_obj_buffer_for_1x1( dt_p, kappa_p ); } // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_c][dt_p]; // Invoke the function. f( transc, schema, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, is_p, pd_p, ps_p, cntx, t ); }
#define GENFRONT( opname ) \ \ void PASTEMAC0(opname)( \ obj_t* chi, \ double* zeta_r, \ double* zeta_i \ ) \ { \ num_t dt_chi = bli_obj_datatype( *chi ); \ num_t dt_def = BLIS_DCOMPLEX; \ num_t dt_use; \ \ /* If chi is a constant object, default to using the dcomplex value to maximize precision, and since we don't know if the caller needs just the real or the real and imaginary parts. */ \ void* buf_chi = bli_obj_buffer_for_1x1( dt_def, *chi ); \ \ FUNCPTR_T f; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ \ /* The _check() routine prevents integer types, so we know that chi is either a constant or an actual floating-point type. */ \ if ( bli_is_constant( dt_chi ) ) dt_use = dt_def; \ else dt_use = dt_chi; \ \ /* Index into the type combination array to extract the correct function pointer. */ \ f = ftypes[dt_use]; \ \
void bli_gemmsup_ref_var1n ( trans_t trans, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, stor3_t eff_id, cntx_t* cntx, rntm_t* rntm ) { #if 0 obj_t at, bt; bli_obj_alias_to( a, &at ); bli_obj_alias_to( b, &bt ); // Induce transpositions on A and/or B if either object is marked for // transposition. We can induce "fast" transpositions since they objects // are guaranteed to not have structure or be packed. if ( bli_obj_has_trans( &at ) ) { bli_obj_induce_fast_trans( &at ); } if ( bli_obj_has_trans( &bt ) ) { bli_obj_induce_fast_trans( &bt ); } const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width( &at ); void* restrict buf_a = bli_obj_buffer_at_off( &at ); const inc_t rs_a = bli_obj_row_stride( &at ); const inc_t cs_a = bli_obj_col_stride( &at ); void* restrict buf_b = bli_obj_buffer_at_off( &bt ); const inc_t rs_b = bli_obj_row_stride( &bt ); const inc_t cs_b = bli_obj_col_stride( &bt ); void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #else const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); dim_t k; void* restrict buf_a = bli_obj_buffer_at_off( a ); inc_t rs_a; inc_t cs_a; void* restrict buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b; inc_t cs_b; if ( bli_obj_has_notrans( a ) ) { k = bli_obj_width( a ); rs_a = bli_obj_row_stride( a ); cs_a = bli_obj_col_stride( a ); } else // if ( bli_obj_has_trans( a ) ) { // Assign the variables with an implicit transposition. k = bli_obj_length( a ); rs_a = bli_obj_col_stride( a ); cs_a = bli_obj_row_stride( a ); } if ( bli_obj_has_notrans( b ) ) { rs_b = bli_obj_row_stride( b ); cs_b = bli_obj_col_stride( b ); } else // if ( bli_obj_has_trans( b ) ) { // Assign the variables with an implicit transposition. rs_b = bli_obj_col_stride( b ); cs_b = bli_obj_row_stride( b ); } void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #endif // Index into the type combination array to extract the correct // function pointer. FUNCPTR_T f = ftypes_var1n[dt_exec]; if ( bli_is_notrans( trans ) ) { // Invoke the function. f ( conja, conjb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, buf_b, rs_b, cs_b, buf_beta, buf_c, rs_c, cs_c, eff_id, cntx, rntm ); } else { // Invoke the function (transposing the operation). f ( conjb, // swap the conj values. conja, n, // swap the m and n dimensions. m, k, buf_alpha, buf_b, cs_b, rs_b, // swap the positions of A and B. buf_a, cs_a, rs_a, // swap the strides of A and B. buf_beta, buf_c, cs_c, rs_c, // swap the strides of C. bli_stor3_trans( eff_id ), // transpose the stor3_t id. cntx, rntm ); } }
void* buf_alphax; \ void* buf_alphay; \ \ obj_t alphax_local; \ obj_t alphay_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alphax, alphay, x, y, z ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alphax, &alphax_local ); \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alphay, &alphay_local ); \ buf_alphax = bli_obj_buffer_for_1x1( dt, &alphax_local ); \ buf_alphay = bli_obj_buffer_for_1x1( dt, &alphay_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjx, \ conjy, \ n, \ buf_alphax, \ buf_alphay, \ buf_x, inc_x, \
// // Define object-based interface. // void bli_dotxf( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { num_t dt = bli_obj_datatype( *x ); conj_t conja = bli_obj_conj_status( *a ); conj_t conjx = bli_obj_conj_status( *x ); dim_t m = bli_obj_vector_dim( *y ); dim_t b_n = bli_obj_vector_dim( *x ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_y = bli_obj_buffer_at_off( *y ); inc_t inc_y = bli_obj_vector_inc( *y ); obj_t alpha_local; void* buf_alpha; obj_t beta_local; void* buf_beta; FUNCPTR_T f = ftypes[dt]; if ( bli_error_checking_is_enabled() ) bli_dotxf_check( alpha, a, x, beta, y ); // Create local copy-casts of the scalars (and apply internal conjugation // if needed). bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, alpha, &alpha_local ); bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, beta, &beta_local ); // Extract the scalar buffers. buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); // Support cases where matrix A requires a transposition. if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } // Invoke the void pointer-based function. f( conja, conjx, m, b_n, buf_alpha, buf_a, rs_a, cs_a, buf_x, inc_x, buf_beta, buf_y, inc_y ); }
void libblis_test_setm_check ( test_params_t* params, obj_t* beta, obj_t* x, double* resid ) { num_t dt_x = bli_obj_datatype( *x ); dim_t m_x = bli_obj_length( *x ); dim_t n_x = bli_obj_width( *x ); inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); void* buf_beta = bli_obj_buffer_for_1x1( dt_x, *beta ); dim_t i, j; *resid = 0.0; // // The easiest way to check that setm was successful is to confirm // that each element of x is equal to beta. // if ( bli_obj_is_float( *x ) ) { float* beta_cast = buf_beta; float* buf_x_cast = buf_x; float* chi1; for ( j = 0; j < n_x; ++j ) { for ( i = 0; i < m_x; ++i ) { chi1 = buf_x_cast + (i )*rs_x + (j )*cs_x; if ( !bli_seq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } } } } else if ( bli_obj_is_double( *x ) ) { double* beta_cast = buf_beta; double* buf_x_cast = buf_x; double* chi1; for ( j = 0; j < n_x; ++j ) { for ( i = 0; i < m_x; ++i ) { chi1 = buf_x_cast + (i )*rs_x + (j )*cs_x; if ( !bli_deq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } } } } else if ( bli_obj_is_scomplex( *x ) ) { scomplex* beta_cast = buf_beta; scomplex* buf_x_cast = buf_x; scomplex* chi1; for ( j = 0; j < n_x; ++j ) { for ( i = 0; i < m_x; ++i ) { chi1 = buf_x_cast + (i )*rs_x + (j )*cs_x; if ( !bli_ceq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } } } } else // if ( bli_obj_is_dcomplex( *x ) ) { dcomplex* beta_cast = buf_beta; dcomplex* buf_x_cast = buf_x; dcomplex* chi1; for ( j = 0; j < n_x; ++j ) { for ( i = 0; i < m_x; ++i ) { chi1 = buf_x_cast + (i )*rs_x + (j )*cs_x; if ( !bli_zeq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } } } } }
void bli_gemmtrsm_ukernel( obj_t* alpha, obj_t* a1x, obj_t* a11, obj_t* bx1, obj_t* b11, obj_t* c11 ) { dim_t k = bli_obj_width( *a1x ); num_t dt = bli_obj_datatype( *c11 ); void* buf_a1x = bli_obj_buffer_at_off( *a1x ); void* buf_a11 = bli_obj_buffer_at_off( *a11 ); void* buf_bx1 = bli_obj_buffer_at_off( *bx1 ); void* buf_b11 = bli_obj_buffer_at_off( *b11 ); void* buf_c11 = bli_obj_buffer_at_off( *c11 ); inc_t rs_c = bli_obj_row_stride( *c11 ); inc_t cs_c = bli_obj_col_stride( *c11 ); void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); auxinfo_t data; FUNCPTR_T f; void* gemmtrsm_ukr; // Fill the auxinfo_t struct in case the micro-kernel uses it. if ( bli_obj_is_lower( *a11 ) ) { bli_auxinfo_set_next_a( buf_a1x, data ); } else { bli_auxinfo_set_next_a( buf_a11, data ); } bli_auxinfo_set_next_b( buf_bx1, data ); // Query the function address from the micro-kernel func_t object. if ( bli_obj_is_lower( *a11 ) ) gemmtrsm_ukr = bli_func_obj_query( dt, gemmtrsm_l_ukrs ); else gemmtrsm_ukr = bli_func_obj_query( dt, gemmtrsm_u_ukrs ); // Index into the type combination array to extract the correct // function pointer. if ( bli_obj_is_lower( *a11 ) ) f = ftypes_l[dt]; else f = ftypes_u[dt]; // Invoke the function. f( k, buf_alpha, buf_a1x, buf_a11, buf_bx1, buf_b11, buf_c11, rs_c, cs_c, &data, gemmtrsm_ukr ); }
void bli_dotxaxpyf_unb_var2( obj_t* alpha, obj_t* at, obj_t* a, obj_t* w, obj_t* x, obj_t* beta, obj_t* y, obj_t* z ) { num_t dt_a = bli_obj_datatype( *a ); num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); conj_t conjat = bli_obj_conj_status( *at ); conj_t conja = bli_obj_conj_status( *a ); conj_t conjw = bli_obj_conj_status( *w ); conj_t conjx = bli_obj_conj_status( *x ); dim_t m = bli_obj_vector_dim( *z ); dim_t b_n = bli_obj_vector_dim( *y ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); inc_t inc_w = bli_obj_vector_inc( *w ); void* buf_w = bli_obj_buffer_at_off( *w ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t inc_y = bli_obj_vector_inc( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); inc_t inc_z = bli_obj_vector_inc( *z ); void* buf_z = bli_obj_buffer_at_off( *z ); num_t dt_alpha; void* buf_alpha; num_t dt_beta; void* buf_beta; FUNCPTR_T f; // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a][dt_x][dt_y]; // Invoke the function. f( conjat, conja, conjw, conjx, m, b_n, buf_alpha, buf_a, rs_a, cs_a, buf_w, inc_w, buf_x, inc_x, buf_beta, buf_y, inc_y, buf_z, inc_z ); }