void bli_scalm_unb_var1( obj_t* alpha, obj_t* x, cntx_t* cntx ) { num_t dt_x = bli_obj_datatype( *x ); doff_t diagoffx = bli_obj_diag_offset( *x ); uplo_t diagx = bli_obj_diag( *x ); uplo_t uplox = bli_obj_uplo( *x ); dim_t m = bli_obj_length( *x ); dim_t n = bli_obj_width( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_alpha; obj_t x_local; FUNCPTR_T f; // Alias x to x_local so we can apply alpha if it is non-unit. bli_obj_alias_to( *x, x_local ); // If alpha is non-unit, apply it to the scalar attached to x. if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &x_local ); } // Grab the address of the internal scalar buffer for the scalar // attached to x. buf_alpha_x = bli_obj_internal_scalar_buffer( *x ); // Index into the type combination array to extract the correct // function pointer. // NOTE: We use dt_x for both alpha and x because alpha was obtained // from the attached scalar of x, which is guaranteed to be of the // same datatype as x. f = ftypes[dt_x][dt_x]; // Invoke the function. // NOTE: We unconditionally pass in BLIS_NO_CONJUGATE for alpha // because it would have already been conjugated by the front-end. f( BLIS_NO_CONJUGATE, diagoffx, diagx, uplox, m, n, buf_alpha, buf_x, rs_x, cs_x ); }
void bli_obj_scalar_reset( obj_t* a ) { num_t dt = bli_obj_datatype( *a ); void* scalar_a = bli_obj_internal_scalar_buffer( *a ); void* one = bli_obj_buffer_for_const( dt, BLIS_ONE ); if ( bli_is_float( dt ) ) *(( float* )scalar_a) = *(( float* )one); else if ( bli_is_double( dt ) ) *(( double* )scalar_a) = *(( double* )one); else if ( bli_is_scomplex( dt ) ) *(( scomplex* )scalar_a) = *(( scomplex* )one); else if ( bli_is_dcomplex( dt ) ) *(( dcomplex* )scalar_a) = *(( dcomplex* )one); // Alternate implementation: //bli_obj_scalar_attach( BLIS_NO_CONJUGATE, &BLIS_ONE, a ); }
void bli_obj_free( obj_t* obj ) { if ( bli_error_checking_is_enabled() ) bli_obj_free_check( obj ); // Don't dereference obj if it is NULL. if ( obj != NULL ) { // Idiot safety: Don't try to free the buffer field if the object // is a detached scalar (ie: if the buffer pointer refers to the // address of the internal scalar buffer). if ( bli_obj_buffer( *obj ) != bli_obj_internal_scalar_buffer( *obj ) ) bli_free_user( bli_obj_buffer( *obj ) ); } }
void bli_obj_scalar_init_detached( num_t dt, obj_t* beta ) { void* p; // Initialize beta without a buffer and then attach its internal buffer. bli_obj_create_without_buffer( dt, 1, 1, beta ); // Query the address of the object's internal scalar buffer. p = bli_obj_internal_scalar_buffer( *beta ); // Update the object. bli_obj_set_buffer( p, *beta ); bli_obj_set_strides( 1, 1, *beta ); bli_obj_set_imag_stride( 1, *beta ); }
void bli_obj_create_without_buffer( num_t dt, dim_t m, dim_t n, obj_t* obj ) { siz_t elem_size; void* s; if ( bli_error_checking_is_enabled() ) bli_obj_create_without_buffer_check( dt, m, n, obj ); // Query the size of one element of the object's pre-set datatype. elem_size = bli_datatype_size( dt ); // Set any default properties that are appropriate. bli_obj_set_defaults( *obj ); // Set the object root to itself, since obj is not presumed to be a view // into a larger matrix. This is typically the only time this field is // ever set; henceforth, subpartitions and aliases to this object will // get copies of this field, and thus always have access to its // "greatest-grand" parent (ie: the original parent, or "root", object). // However, there ARE a few places where it is convenient to reset the // root field explicitly via bli_obj_set_as_root(). (We do not list // those places here. Just grep for bli_obj_set_as_root within the // top-level 'frame' directory to see them. bli_obj_set_as_root( *obj ); // Set individual fields. bli_obj_set_buffer( NULL, *obj ); bli_obj_set_datatype( dt, *obj ); bli_obj_set_elem_size( elem_size, *obj ); bli_obj_set_target_datatype( dt, *obj ); bli_obj_set_execution_datatype( dt, *obj ); bli_obj_set_dims( m, n, *obj ); bli_obj_set_offs( 0, 0, *obj ); bli_obj_set_diag_offset( 0, *obj ); // Set the internal scalar to 1.0. s = bli_obj_internal_scalar_buffer( *obj ); if ( bli_is_float( dt ) ) { bli_sset1s( *(( float* )s) ); } else if ( bli_is_double( dt ) ) { bli_dset1s( *(( double* )s) ); } else if ( bli_is_scomplex( dt ) ) { bli_cset1s( *(( scomplex* )s) ); } else if ( bli_is_dcomplex( dt ) ) { bli_zset1s( *(( dcomplex* )s) ); } }
bool_t bli_obj_scalar_has_nonzero_imag( obj_t* a ) { bool_t r_val = FALSE; num_t dt = bli_obj_datatype( *a ); void* scalar_a = bli_obj_internal_scalar_buffer( *a ); if ( bli_is_real( dt ) ) { r_val = FALSE; } else if ( bli_is_scomplex( dt ) ) { r_val = ( bli_cimag( *(( scomplex* )scalar_a) ) != 0.0F ); } else if ( bli_is_dcomplex( dt ) ) { r_val = ( bli_zimag( *(( dcomplex* )scalar_a) ) != 0.0 ); } return r_val; }
void bli_trsm_rl_ker_var2( obj_t* a, obj_t* b, obj_t* c, trsm_t* cntl, trsm_thrinfo_t* thread ) { num_t dt_exec = bli_obj_execution_datatype( *c ); doff_t diagoffb = bli_obj_diag_offset( *b ); pack_t schema_a = bli_obj_pack_schema( *a ); pack_t schema_b = bli_obj_pack_schema( *b ); dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t cs_a = bli_obj_col_stride( *a ); dim_t pd_a = bli_obj_panel_dim( *a ); inc_t ps_a = bli_obj_panel_stride( *a ); void* buf_b = bli_obj_buffer_at_off( *b ); inc_t rs_b = bli_obj_row_stride( *b ); dim_t pd_b = bli_obj_panel_dim( *b ); inc_t ps_b = bli_obj_panel_stride( *b ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; func_t* gemmtrsm_ukrs; func_t* gemm_ukrs; void* gemmtrsm_ukr; void* gemm_ukr; // Grab the address of the internal scalar buffer for the scalar // attached to A. This will be the alpha scalar used in the gemmtrsm // subproblems (ie: the scalar that would be applied to the packed // copy of A prior to it being updated by the trsm subproblem). This // scalar may be unit, if for example it was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( *a ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Extract from the control tree node the func_t objects containing // the gemmtrsm and gemm micro-kernel function addresses, and then // query the function addresses corresponding to the current datatype. gemmtrsm_ukrs = cntl_gemmtrsm_u_ukrs( cntl ); gemm_ukrs = cntl_gemm_ukrs( cntl ); gemmtrsm_ukr = bli_func_obj_query( dt_exec, gemmtrsm_ukrs ); gemm_ukr = bli_func_obj_query( dt_exec, gemm_ukrs ); // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, gemmtrsm_ukr, gemm_ukr, thread ); }
void bli_trsm_ru_ker_var2( obj_t* a, obj_t* b, obj_t* c, trsm_t* cntl ) { num_t dt_exec = bli_obj_execution_datatype( *c ); doff_t diagoffb = bli_obj_diag_offset( *b ); dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t cs_a = bli_obj_col_stride( *a ); inc_t pd_a = bli_obj_panel_dim( *a ); inc_t ps_a = bli_obj_panel_stride( *a ); void* buf_b = bli_obj_buffer_at_off( *b ); inc_t rs_b = bli_obj_row_stride( *b ); inc_t pd_b = bli_obj_panel_dim( *b ); inc_t ps_b = bli_obj_panel_stride( *b ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; func_t* gemmtrsm_ukrs; func_t* gemm_ukrs; void* gemmtrsm_ukr; void* gemm_ukr; // Grab the address of the internal scalar buffer for the scalar // attached to A. This will be the alpha scalar used in the gemmtrsm // subproblems (ie: the scalar that would be applied to the packed // copy of A prior to it being updated by the trsm subproblem). This // scalar may be unit, if for example it was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( *a ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Adjust cs_a and rs_b if A and B were packed for 4m or 3m. This // is needed because cs_a and rs_b are used to index into the // micro-panels of A and B, respectively, and since the pointer // types in the macro-kernel (scomplex or dcomplex) will result // in pointer arithmetic that moves twice as far as it should, // given the datatypes actually stored (float or double), we must // halve the strides to compensate. if ( bli_obj_is_panel_packed_4m( *a ) || bli_obj_is_panel_packed_3m( *a ) ) { cs_a /= 2; rs_b /= 2; } // Extract from the control tree node the func_t objects containing // the gemmtrsm and gemm micro-kernel function addresses, and then // query the function addresses corresponding to the current datatype. gemmtrsm_ukrs = cntl_gemmtrsm_l_ukrs( cntl ); gemm_ukrs = cntl_gemm_ukrs( cntl ); gemmtrsm_ukr = bli_func_obj_query( dt_exec, gemmtrsm_ukrs ); gemm_ukr = bli_func_obj_query( dt_exec, gemm_ukrs ); // Invoke the function. f( diagoffb, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, gemmtrsm_ukr, gemm_ukr ); }
void bli_herk_l_ker_var2( obj_t* a, obj_t* b, obj_t* c, gemm_t* cntl, herk_thrinfo_t* thread ) { num_t dt_exec = bli_obj_execution_datatype( *c ); doff_t diagoffc = bli_obj_diag_offset( *c ); pack_t schema_a = bli_obj_pack_schema( *a ); pack_t schema_b = bli_obj_pack_schema( *b ); dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t cs_a = bli_obj_col_stride( *a ); inc_t pd_a = bli_obj_panel_dim( *a ); inc_t ps_a = bli_obj_panel_stride( *a ); void* buf_b = bli_obj_buffer_at_off( *b ); inc_t rs_b = bli_obj_row_stride( *b ); inc_t pd_b = bli_obj_panel_dim( *b ); inc_t ps_b = bli_obj_panel_stride( *b ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; func_t* gemm_ukrs; void* gemm_ukr; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Extract from the control tree node the func_t object containing // the gemm micro-kernel function addresses, and then query the // function address corresponding to the current datatype. gemm_ukrs = cntl_gemm_ukrs( cntl ); gemm_ukr = bli_func_obj_query( dt_exec, gemm_ukrs ); // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, gemm_ukr, thread ); }
void bli_herk_u_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, thread ); }
void bli_trsm_rl_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_execution_datatype( *c ); doff_t diagoffb = bli_obj_diag_offset( *b ); pack_t schema_a = bli_obj_pack_schema( *a ); pack_t schema_b = bli_obj_pack_schema( *b ); dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t cs_a = bli_obj_col_stride( *a ); dim_t pd_a = bli_obj_panel_dim( *a ); inc_t ps_a = bli_obj_panel_stride( *a ); void* buf_b = bli_obj_buffer_at_off( *b ); inc_t rs_b = bli_obj_row_stride( *b ); dim_t pd_b = bli_obj_panel_dim( *b ); inc_t ps_b = bli_obj_panel_stride( *b ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to A (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of A prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( *a ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, thread ); }
void bli_trmm_rl_ker_var2( obj_t* a, obj_t* b, obj_t* c, trmm_t* cntl ) { num_t dt_exec = bli_obj_execution_datatype( *c ); doff_t diagoffb = bli_obj_diag_offset( *b ); dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); inc_t ps_a = bli_obj_panel_stride( *a ); void* buf_b = bli_obj_buffer_at_off( *b ); inc_t rs_b = bli_obj_row_stride( *b ); inc_t cs_b = bli_obj_col_stride( *b ); inc_t ps_b = bli_obj_panel_stride( *b ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, ps_a, buf_b, rs_b, cs_b, ps_b, buf_beta, buf_c, rs_c, cs_c ); }
void bli_trmm_ru_ker_var2( obj_t* a, obj_t* b, obj_t* c, trmm_t* cntl, trmm_thrinfo_t* thread ) { num_t dt_exec = bli_obj_execution_datatype( *c ); doff_t diagoffb = bli_obj_diag_offset( *b ); dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t cs_a = bli_obj_col_stride( *a ); inc_t pd_a = bli_obj_panel_dim( *a ); inc_t ps_a = bli_obj_panel_stride( *a ); void* buf_b = bli_obj_buffer_at_off( *b ); inc_t rs_b = bli_obj_row_stride( *b ); inc_t pd_b = bli_obj_panel_dim( *b ); inc_t ps_b = bli_obj_panel_stride( *b ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; func_t* gemm_ukrs; void* gemm_ukr; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Adjust cs_a and rs_b if A and B were packed for 4m or 3m. This // is needed because cs_a and rs_b are used to index into the // micro-panels of A and B, respectively, and since the pointer // types in the macro-kernel (scomplex or dcomplex) will result // in pointer arithmetic that moves twice as far as it should, // given the datatypes actually stored (float or double), we must // halve the strides to compensate. if ( bli_obj_is_panel_packed_4m( *a ) || bli_obj_is_panel_packed_3m( *a ) ) { cs_a /= 2; rs_b /= 2; } // Extract from the control tree node the func_t object containing // the gemm micro-kernel function addresses, and then query the // function address corresponding to the current datatype. gemm_ukrs = cntl_gemm_ukrs( cntl ); gemm_ukr = bli_func_obj_query( dt_exec, gemm_ukrs ); // Invoke the function. f( diagoffb, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, gemm_ukr, thread ); }
void bli_gemm_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_execution_datatype( *c ); pack_t schema_a = bli_obj_pack_schema( *a ); pack_t schema_b = bli_obj_pack_schema( *b ); dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t cs_a = bli_obj_col_stride( *a ); inc_t is_a = bli_obj_imag_stride( *a ); dim_t pd_a = bli_obj_panel_dim( *a ); inc_t ps_a = bli_obj_panel_stride( *a ); void* buf_b = bli_obj_buffer_at_off( *b ); inc_t rs_b = bli_obj_row_stride( *b ); inc_t is_b = bli_obj_imag_stride( *b ); dim_t pd_b = bli_obj_panel_dim( *b ); inc_t ps_b = bli_obj_panel_stride( *b ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( *c ); // If 1m is being employed on a column- or row-stored matrix with a // real-valued beta, we can use the real domain macro-kernel, which // eliminates a little overhead associated with the 1m virtual // micro-kernel. #if 1 if ( bli_is_1m_packed( schema_a ) ) { bli_l3_ind_recast_1m_params ( dt_exec, schema_a, c, m, n, k, pd_a, ps_a, pd_b, ps_b, rs_c, cs_c ); } #endif // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, thread ); }