Example #1
0
void bli_trsm_rl_ker_var2( obj_t*  a,
                           obj_t*  b,
                           obj_t*  c,
                           trsm_t* cntl,
                           trsm_thrinfo_t* thread )
{
	num_t     dt_exec   = bli_obj_execution_datatype( *c );

	doff_t    diagoffb  = bli_obj_diag_offset( *b );

	pack_t    schema_a  = bli_obj_pack_schema( *a );
	pack_t    schema_b  = bli_obj_pack_schema( *b );

	dim_t     m         = bli_obj_length( *c );
	dim_t     n         = bli_obj_width( *c );
	dim_t     k         = bli_obj_width( *a );

	void*     buf_a     = bli_obj_buffer_at_off( *a );
	inc_t     cs_a      = bli_obj_col_stride( *a );
	dim_t     pd_a      = bli_obj_panel_dim( *a );
	inc_t     ps_a      = bli_obj_panel_stride( *a );

	void*     buf_b     = bli_obj_buffer_at_off( *b );
	inc_t     rs_b      = bli_obj_row_stride( *b );
	dim_t     pd_b      = bli_obj_panel_dim( *b );
	inc_t     ps_b      = bli_obj_panel_stride( *b );

	void*     buf_c     = bli_obj_buffer_at_off( *c );
	inc_t     rs_c      = bli_obj_row_stride( *c );
	inc_t     cs_c      = bli_obj_col_stride( *c );

	void*     buf_alpha1;
	void*     buf_alpha2;

	FUNCPTR_T f;

	func_t*   gemmtrsm_ukrs;
	func_t*   gemm_ukrs;
	void*     gemmtrsm_ukr;
	void*     gemm_ukr;


	// Grab the address of the internal scalar buffer for the scalar
	// attached to A. This will be the alpha scalar used in the gemmtrsm
	// subproblems (ie: the scalar that would be applied to the packed
	// copy of A prior to it being updated by the trsm subproblem). This
	// scalar may be unit, if for example it was applied during packing.
	buf_alpha1 = bli_obj_internal_scalar_buffer( *a );

	// Grab the address of the internal scalar buffer for the scalar
	// attached to C. This will be the "beta" scalar used in the gemm-only
	// subproblems that correspond to micro-panels that do not intersect
	// the diagonal. We need this separate scalar because it's possible
	// that the alpha attached to B was reset, if it was applied during
	// packing.
	buf_alpha2 = bli_obj_internal_scalar_buffer( *c );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_exec];

	// Extract from the control tree node the func_t objects containing
	// the gemmtrsm and gemm micro-kernel function addresses, and then
	// query the function addresses corresponding to the current datatype.
	gemmtrsm_ukrs = cntl_gemmtrsm_u_ukrs( cntl );
	gemm_ukrs     = cntl_gemm_ukrs( cntl );
	gemmtrsm_ukr  = bli_func_obj_query( dt_exec, gemmtrsm_ukrs );
	gemm_ukr      = bli_func_obj_query( dt_exec, gemm_ukrs );

	// Invoke the function.
	f( diagoffb,
	   schema_a,
	   schema_b,
	   m,
	   n,
	   k,
	   buf_alpha1,
	   buf_a, cs_a, pd_a, ps_a,
	   buf_b, rs_b, pd_b, ps_b,
	   buf_alpha2,
	   buf_c, rs_c, cs_c,
	   gemmtrsm_ukr,
	   gemm_ukr,
	   thread );
}
Example #2
0
void bli_trsm_lu_ker_var2( obj_t*  a,
                           obj_t*  b,
                           obj_t*  c,
                           trsm_t* cntl )
{
	num_t     dt_exec   = bli_obj_execution_datatype( *c );

	doff_t    diagoffa  = bli_obj_diag_offset( *a );

	dim_t     m         = bli_obj_length( *c );
	dim_t     n         = bli_obj_width( *c );
	dim_t     k         = bli_obj_width( *a );

	void*     buf_a     = bli_obj_buffer_at_off( *a );
	inc_t     cs_a      = bli_obj_col_stride( *a );
	inc_t     pd_a      = bli_obj_panel_dim( *a );
	inc_t     ps_a      = bli_obj_panel_stride( *a );

	void*     buf_b     = bli_obj_buffer_at_off( *b );
	inc_t     rs_b      = bli_obj_row_stride( *b );
	inc_t     pd_b      = bli_obj_panel_dim( *b );
	inc_t     ps_b      = bli_obj_panel_stride( *b );

	void*     buf_c     = bli_obj_buffer_at_off( *c );
	inc_t     rs_c      = bli_obj_row_stride( *c );
	inc_t     cs_c      = bli_obj_col_stride( *c );

	void*     buf_alpha1;
	void*     buf_alpha2;

	FUNCPTR_T f;

	func_t*   gemmtrsm_ukrs;
	func_t*   gemm_ukrs;
	void*     gemmtrsm_ukr;
	void*     gemm_ukr;


	// Grab the address of the internal scalar buffer for the scalar
	// attached to B. This will be the alpha scalar used in the gemmtrsm
	// subproblems (ie: the scalar that would be applied to the packed
	// copy of B prior to it being updated by the trsm subproblem). This
	// scalar may be unit, if for example it was applied during packing.
	buf_alpha1 = bli_obj_internal_scalar_buffer( *b );

	// Grab the address of the internal scalar buffer for the scalar
	// attached to C. This will be the "beta" scalar used in the gemm-only
	// subproblems that correspond to micro-panels that do not intersect
	// the diagonal. We need this separate scalar because it's possible
	// that the alpha attached to B was reset, if it was applied during
	// packing.
	buf_alpha2 = bli_obj_internal_scalar_buffer( *c );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_exec];

	// Adjust cs_a and rs_b if A and B were packed for 4m or 3m. This
	// is needed because cs_a and rs_b are used to index into the
	// micro-panels of A and B, respectively, and since the pointer
	// types in the macro-kernel (scomplex or dcomplex) will result
	// in pointer arithmetic that moves twice as far as it should,
	// given the datatypes actually stored (float or double), we must
	// halve the strides to compensate.
	if ( bli_obj_is_panel_packed_4m( *a ) ||
	     bli_obj_is_panel_packed_3m( *a ) ) { cs_a /= 2; rs_b /= 2; }

	// Extract from the control tree node the func_t objects containing
	// the gemmtrsm and gemm micro-kernel function addresses, and then
	// query the function addresses corresponding to the current datatype.
	gemmtrsm_ukrs = cntl_gemmtrsm_u_ukrs( cntl );
	gemm_ukrs     = cntl_gemm_ukrs( cntl );
	gemmtrsm_ukr  = bli_func_obj_query( dt_exec, gemmtrsm_ukrs );
	gemm_ukr      = bli_func_obj_query( dt_exec, gemm_ukrs );

	// Invoke the function.
	f( diagoffa,
	   m,
	   n,
	   k,
	   buf_alpha1,
	   buf_a, cs_a, pd_a, ps_a,
	   buf_b, rs_b, pd_b, ps_b,
	   buf_alpha2,
	   buf_c, rs_c, cs_c,
	   gemmtrsm_ukr,
	   gemm_ukr );
}