void bli_norm1m_unb_var1( obj_t* x, obj_t* norm ) { num_t dt_x = bli_obj_datatype( *x ); doff_t diagoffx = bli_obj_diag_offset( *x ); uplo_t diagx = bli_obj_diag( *x ); uplo_t uplox = bli_obj_uplo( *x ); dim_t m = bli_obj_length( *x ); dim_t n = bli_obj_width( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_norm = bli_obj_buffer_at_off( *norm ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x]; // Invoke the function. f( diagoffx, diagx, uplox, m, n, buf_x, rs_x, cs_x, buf_norm ); }
void bli_unpackm_blk_var2( obj_t* p, obj_t* c, unpackm_t* cntl ) { num_t dt_cp = bli_obj_datatype( *c ); // Normally we take the parameters from the source argument. But here, // the packm/unpackm framework is not yet solidified enough for us to // assume that at this point struc(P) == struc(C), (ie: since // densification may have marked P's structure as dense when the root // is upper or lower). So, we take the struc field from C, not P. struc_t strucc = bli_obj_struc( *c ); doff_t diagoffc = bli_obj_diag_offset( *c ); diag_t diagc = bli_obj_diag( *c ); uplo_t uploc = bli_obj_uplo( *c ); // Again, normally the trans argument is on the source matrix. But we // know that the packed matrix is not transposed. If there is to be a // transposition, it is because C was originally transposed when packed. // Thus, we query C for the trans status, not P. Also, we only query // the trans status (not the conjugation status), since we probably // don't want to un-conjugate if the original matrix was conjugated // when packed. trans_t transc = bli_obj_onlytrans_status( *c ); dim_t m_c = bli_obj_length( *c ); dim_t n_c = bli_obj_width( *c ); dim_t m_panel = bli_obj_panel_length( *c ); dim_t n_panel = bli_obj_panel_width( *c ); void* buf_p = bli_obj_buffer_at_off( *p ); inc_t rs_p = bli_obj_row_stride( *p ); inc_t cs_p = bli_obj_col_stride( *p ); dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; // Invoke the function. f( strucc, diagoffc, diagc, uploc, transc, m_c, n_c, m_panel, n_panel, buf_p, rs_p, cs_p, pd_p, ps_p, buf_c, rs_c, cs_c ); }
void bli_packm_unb_var1( obj_t* c, obj_t* p, packm_thrinfo_t* thread ) { num_t dt_cp = bli_obj_datatype( *c ); struc_t strucc = bli_obj_struc( *c ); doff_t diagoffc = bli_obj_diag_offset( *c ); diag_t diagc = bli_obj_diag( *c ); uplo_t uploc = bli_obj_uplo( *c ); trans_t transc = bli_obj_conjtrans_status( *c ); dim_t m_p = bli_obj_length( *p ); dim_t n_p = bli_obj_width( *p ); dim_t m_max_p = bli_obj_padded_length( *p ); dim_t n_max_p = bli_obj_padded_width( *p ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); void* buf_p = bli_obj_buffer_at_off( *p ); inc_t rs_p = bli_obj_row_stride( *p ); inc_t cs_p = bli_obj_col_stride( *p ); void* buf_kappa; FUNCPTR_T f; // This variant assumes that the computational kernel will always apply // the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE // for kappa so that the underlying packm implementation does not scale // during packing. buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; if( thread_am_ochief( thread ) ) { // Invoke the function. f( strucc, diagoffc, diagc, uploc, transc, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p ); } }
void bli_scalm_unb_var1( obj_t* alpha, obj_t* x, cntx_t* cntx ) { num_t dt_x = bli_obj_datatype( *x ); doff_t diagoffx = bli_obj_diag_offset( *x ); uplo_t diagx = bli_obj_diag( *x ); uplo_t uplox = bli_obj_uplo( *x ); dim_t m = bli_obj_length( *x ); dim_t n = bli_obj_width( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_alpha; obj_t x_local; FUNCPTR_T f; // Alias x to x_local so we can apply alpha if it is non-unit. bli_obj_alias_to( *x, x_local ); // If alpha is non-unit, apply it to the scalar attached to x. if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &x_local ); } // Grab the address of the internal scalar buffer for the scalar // attached to x. buf_alpha_x = bli_obj_internal_scalar_buffer( *x ); // Index into the type combination array to extract the correct // function pointer. // NOTE: We use dt_x for both alpha and x because alpha was obtained // from the attached scalar of x, which is guaranteed to be of the // same datatype as x. f = ftypes[dt_x][dt_x]; // Invoke the function. // NOTE: We unconditionally pass in BLIS_NO_CONJUGATE for alpha // because it would have already been conjugated by the front-end. f( BLIS_NO_CONJUGATE, diagoffx, diagx, uplox, m, n, buf_alpha, buf_x, rs_x, cs_x ); }
void bli_axpym_unb_var1( obj_t* alpha, obj_t* x, obj_t* y, cntx_t* cntx ) { num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); doff_t diagoffx = bli_obj_diag_offset( *x ); diag_t diagx = bli_obj_diag( *x ); uplo_t uplox = bli_obj_uplo( *x ); trans_t transx = bli_obj_conjtrans_status( *x ); dim_t m = bli_obj_length( *y ); dim_t n = bli_obj_width( *y ); inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t rs_y = bli_obj_row_stride( *y ); inc_t cs_y = bli_obj_col_stride( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; // If alpha is a scalar constant, use dt_x to extract the address of the // corresponding constant value; otherwise, use the datatype encoded // within the alpha object and extract the buffer at the alpha offset. bli_set_scalar_dt_buffer( alpha, dt_x, dt_alpha, buf_alpha ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_alpha][dt_x][dt_y]; // Invoke the function. f( diagoffx, diagx, uplox, transx, m, n, buf_alpha, buf_x, rs_x, cs_x, buf_y, rs_y, cs_y ); }
void bli_trmv_unf_var2( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ) { num_t dt_a = bli_obj_datatype( *a ); num_t dt_x = bli_obj_datatype( *x ); uplo_t uplo = bli_obj_uplo( *a ); trans_t trans = bli_obj_conjtrans_status( *a ); diag_t diag = bli_obj_diag( *a ); dim_t m = bli_obj_length( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t incx = bli_obj_vector_inc( *x ); num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a][dt_x]; // Invoke the function. f( uplo, trans, diag, m, buf_alpha, buf_a, rs_a, cs_a, buf_x, incx ); }
void bli_addm_unb_var1( obj_t* x, obj_t* y, cntx_t* cntx ) { num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); doff_t diagoffx = bli_obj_diag_offset( *x ); diag_t diagx = bli_obj_diag( *x ); uplo_t uplox = bli_obj_uplo( *x ); trans_t transx = bli_obj_conjtrans_status( *x ); dim_t m = bli_obj_length( *y ); dim_t n = bli_obj_width( *y ); inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t rs_y = bli_obj_row_stride( *y ); inc_t cs_y = bli_obj_col_stride( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x][dt_y]; // Invoke the function. f( diagoffx, diagx, uplox, transx, m, n, buf_x, rs_x, cs_x, buf_y, rs_y, cs_y ); }
void bli_packm_blk_var1( obj_t* c, obj_t* p, packm_thrinfo_t* t ) { num_t dt_cp = bli_obj_datatype( *c ); struc_t strucc = bli_obj_struc( *c ); doff_t diagoffc = bli_obj_diag_offset( *c ); diag_t diagc = bli_obj_diag( *c ); uplo_t uploc = bli_obj_uplo( *c ); trans_t transc = bli_obj_conjtrans_status( *c ); pack_t schema = bli_obj_pack_schema( *p ); bool_t invdiag = bli_obj_has_inverted_diag( *p ); bool_t revifup = bli_obj_is_pack_rev_if_upper( *p ); bool_t reviflo = bli_obj_is_pack_rev_if_lower( *p ); dim_t m_p = bli_obj_length( *p ); dim_t n_p = bli_obj_width( *p ); dim_t m_max_p = bli_obj_padded_length( *p ); dim_t n_max_p = bli_obj_padded_width( *p ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); void* buf_p = bli_obj_buffer_at_off( *p ); inc_t rs_p = bli_obj_row_stride( *p ); inc_t cs_p = bli_obj_col_stride( *p ); inc_t is_p = bli_obj_imag_stride( *p ); dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); obj_t kappa; /*---initialize pointer to stop gcc complaining 2-9-16 GH --- */ obj_t* kappa_p = {0}; void* buf_kappa; func_t* packm_kers; void* packm_ker; FUNCPTR_T f; // Treatment of kappa (ie: packing during scaling) depends on // whether we are executing an induced method. if ( bli_is_ind_packed( schema ) ) { // The value for kappa we use will depend on whether the scalar // attached to A has a nonzero imaginary component. If it does, // then we will apply the scalar during packing to facilitate // implementing induced complex domain algorithms in terms of // real domain micro-kernels. (In the aforementioned situation, // applying a real scalar is easy, but applying a complex one is // harder, so we avoid the need altogether with the code below.) if( thread_am_ochief( t ) ) { if ( bli_obj_scalar_has_nonzero_imag( p ) ) { // Detach the scalar. bli_obj_scalar_detach( p, &kappa ); // Reset the attached scalar (to 1.0). bli_obj_scalar_reset( p ); kappa_p = κ } else { // If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE; } } kappa_p = thread_obroadcast( t, kappa_p ); // Acquire the buffer to the kappa chosen above. buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p ); } else // if ( bli_is_nat_packed( schema ) ) { // This branch if for native execution, where we assume that // the micro-kernel will always apply the alpha scalar of the // higher-level operation. Thus, we use BLIS_ONE for kappa so // that the underlying packm implementation does not perform // any scaling during packing. buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); } // Choose the correct func_t object based on the pack_t schema. if ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4mi_kers; else if ( bli_is_3mi_packed( schema ) || bli_is_3ms_packed( schema ) ) packm_kers = packm_struc_cxk_3mis_kers; else if ( bli_is_ro_packed( schema ) || bli_is_io_packed( schema ) || bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers; else packm_kers = packm_struc_cxk_kers; // Query the datatype-specific function pointer from the func_t object. packm_ker = bli_func_obj_query( dt_cp, packm_kers ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; // Invoke the function. f( strucc, diagoffc, diagc, uploc, transc, schema, invdiag, revifup, reviflo, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, is_p, pd_p, ps_p, packm_ker, t ); }