コード例 #1
0
void bli_norm1m_unb_var1( obj_t* x,
                          obj_t* norm )
{
    num_t     dt_x     = bli_obj_datatype( *x );

    doff_t    diagoffx = bli_obj_diag_offset( *x );
    uplo_t    diagx    = bli_obj_diag( *x );
    uplo_t    uplox    = bli_obj_uplo( *x );

    dim_t     m        = bli_obj_length( *x );
    dim_t     n        = bli_obj_width( *x );

    void*     buf_x    = bli_obj_buffer_at_off( *x );
    inc_t     rs_x     = bli_obj_row_stride( *x );
    inc_t     cs_x     = bli_obj_col_stride( *x );

    void*     buf_norm = bli_obj_buffer_at_off( *norm );

    FUNCPTR_T f;

    // Index into the type combination array to extract the correct
    // function pointer.
    f = ftypes[dt_x];

    // Invoke the function.
    f( diagoffx,
       diagx,
       uplox,
       m,
       n,
       buf_x, rs_x, cs_x,
       buf_norm );
}
コード例 #2
0
ファイル: bli_unpackm_blk_var2.c プロジェクト: elemental/blis
void bli_unpackm_blk_var2( obj_t*     p,
                           obj_t*     c,
                           unpackm_t* cntl )
{
	num_t     dt_cp     = bli_obj_datatype( *c );

	// Normally we take the parameters from the source argument. But here,
	// the packm/unpackm framework is not yet solidified enough for us to
	// assume that at this point struc(P) == struc(C), (ie: since
	// densification may have marked P's structure as dense when the root
	// is upper or lower). So, we take the struc field from C, not P.
	struc_t   strucc    = bli_obj_struc( *c );
	doff_t    diagoffc  = bli_obj_diag_offset( *c );
	diag_t    diagc     = bli_obj_diag( *c );
	uplo_t    uploc     = bli_obj_uplo( *c );

	// Again, normally the trans argument is on the source matrix. But we
	// know that the packed matrix is not transposed. If there is to be a
	// transposition, it is because C was originally transposed when packed.
	// Thus, we query C for the trans status, not P. Also, we only query
	// the trans status (not the conjugation status), since we probably
	// don't want to un-conjugate if the original matrix was conjugated
	// when packed.
	trans_t   transc    = bli_obj_onlytrans_status( *c );

	dim_t     m_c       = bli_obj_length( *c );
	dim_t     n_c       = bli_obj_width( *c );
	dim_t     m_panel   = bli_obj_panel_length( *c );
	dim_t     n_panel   = bli_obj_panel_width( *c );

	void*     buf_p     = bli_obj_buffer_at_off( *p );
	inc_t     rs_p      = bli_obj_row_stride( *p );
	inc_t     cs_p      = bli_obj_col_stride( *p );
	dim_t     pd_p      = bli_obj_panel_dim( *p );
	inc_t     ps_p      = bli_obj_panel_stride( *p );

	void*     buf_c     = bli_obj_buffer_at_off( *c );
	inc_t     rs_c      = bli_obj_row_stride( *c );
	inc_t     cs_c      = bli_obj_col_stride( *c );

	FUNCPTR_T f;

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_cp];

	// Invoke the function.
	f( strucc,
	   diagoffc,
	   diagc,
	   uploc,
	   transc,
	   m_c,
	   n_c,
	   m_panel,
	   n_panel,
	   buf_p, rs_p, cs_p,
	          pd_p, ps_p,
	   buf_c, rs_c, cs_c );
}
コード例 #3
0
void bli_packm_unb_var1( obj_t*   c,
                         obj_t*   p,
                         packm_thrinfo_t* thread )
{
	num_t     dt_cp     = bli_obj_datatype( *c );

	struc_t   strucc    = bli_obj_struc( *c );
	doff_t    diagoffc  = bli_obj_diag_offset( *c );
	diag_t    diagc     = bli_obj_diag( *c );
	uplo_t    uploc     = bli_obj_uplo( *c );
	trans_t   transc    = bli_obj_conjtrans_status( *c );

	dim_t     m_p       = bli_obj_length( *p );
	dim_t     n_p       = bli_obj_width( *p );
	dim_t     m_max_p   = bli_obj_padded_length( *p );
	dim_t     n_max_p   = bli_obj_padded_width( *p );

	void*     buf_c     = bli_obj_buffer_at_off( *c );
	inc_t     rs_c      = bli_obj_row_stride( *c );
	inc_t     cs_c      = bli_obj_col_stride( *c );

	void*     buf_p     = bli_obj_buffer_at_off( *p );
	inc_t     rs_p      = bli_obj_row_stride( *p );
	inc_t     cs_p      = bli_obj_col_stride( *p );

	void*     buf_kappa;

	FUNCPTR_T f;


	// This variant assumes that the computational kernel will always apply
	// the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
	// for kappa so that the underlying packm implementation does not scale
	// during packing.
	buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_cp];

    if( thread_am_ochief( thread ) ) {
        // Invoke the function.
        f( strucc,
           diagoffc,
           diagc,
           uploc,
           transc,
           m_p,
           n_p,
           m_max_p,
           n_max_p,
           buf_kappa,
           buf_c, rs_c, cs_c,
           buf_p, rs_p, cs_p );
    }
}
コード例 #4
0
ファイル: bli_scalm_unb_var1.c プロジェクト: ShawnLess/blis
void bli_scalm_unb_var1( obj_t*  alpha,
                         obj_t*  x,
                         cntx_t* cntx )
{
	num_t     dt_x      = bli_obj_datatype( *x );

	doff_t    diagoffx  = bli_obj_diag_offset( *x );
	uplo_t    diagx     = bli_obj_diag( *x );
	uplo_t    uplox     = bli_obj_uplo( *x );

	dim_t     m         = bli_obj_length( *x );
	dim_t     n         = bli_obj_width( *x );

	void*     buf_x     = bli_obj_buffer_at_off( *x );
	inc_t     rs_x      = bli_obj_row_stride( *x );
	inc_t     cs_x      = bli_obj_col_stride( *x );

	void*     buf_alpha;

	obj_t     x_local;

	FUNCPTR_T f;

	// Alias x to x_local so we can apply alpha if it is non-unit.
	bli_obj_alias_to( *x, x_local );

	// If alpha is non-unit, apply it to the scalar attached to x.
	if ( !bli_obj_equals( alpha, &BLIS_ONE ) )
	{
		bli_obj_scalar_apply_scalar( alpha, &x_local );
	}

	// Grab the address of the internal scalar buffer for the scalar
	// attached to x.
	buf_alpha_x = bli_obj_internal_scalar_buffer( *x );

	// Index into the type combination array to extract the correct
	// function pointer.
	// NOTE: We use dt_x for both alpha and x because alpha was obtained
	// from the attached scalar of x, which is guaranteed to be of the
	// same datatype as x.
	f = ftypes[dt_x][dt_x];

	// Invoke the function.
	// NOTE: We unconditionally pass in BLIS_NO_CONJUGATE for alpha
	// because it would have already been conjugated by the front-end.
	f( BLIS_NO_CONJUGATE,
	   diagoffx,
	   diagx,
	   uplox,
	   m,
	   n,
	   buf_alpha,
	   buf_x, rs_x, cs_x );
}
コード例 #5
0
ファイル: bli_axpym_unb_var1.c プロジェクト: ShawnLess/blis
void bli_axpym_unb_var1( obj_t*  alpha,
                         obj_t*  x,
                         obj_t*  y,
                         cntx_t* cntx )
{
	num_t     dt_x      = bli_obj_datatype( *x );
	num_t     dt_y      = bli_obj_datatype( *y );

	doff_t    diagoffx  = bli_obj_diag_offset( *x );
	diag_t    diagx     = bli_obj_diag( *x );
	uplo_t    uplox     = bli_obj_uplo( *x );
	trans_t   transx    = bli_obj_conjtrans_status( *x );

	dim_t     m         = bli_obj_length( *y );
	dim_t     n         = bli_obj_width( *y );

	inc_t     rs_x      = bli_obj_row_stride( *x );
	inc_t     cs_x      = bli_obj_col_stride( *x );
	void*     buf_x     = bli_obj_buffer_at_off( *x );

	inc_t     rs_y      = bli_obj_row_stride( *y );
	inc_t     cs_y      = bli_obj_col_stride( *y );
	void*     buf_y     = bli_obj_buffer_at_off( *y );

	num_t     dt_alpha;
	void*     buf_alpha;

	FUNCPTR_T f;

	// If alpha is a scalar constant, use dt_x to extract the address of the
	// corresponding constant value; otherwise, use the datatype encoded
	// within the alpha object and extract the buffer at the alpha offset.
	bli_set_scalar_dt_buffer( alpha, dt_x, dt_alpha, buf_alpha );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_alpha][dt_x][dt_y];

	// Invoke the function.
	f( diagoffx,
	   diagx,
	   uplox,
	   transx,
	   m,
	   n,
	   buf_alpha,
	   buf_x, rs_x, cs_x,
	   buf_y, rs_y, cs_y );
}
コード例 #6
0
ファイル: bli_trmv_unf_var2.c プロジェクト: ShawnLess/blis
void bli_trmv_unf_var2( obj_t*  alpha,
                        obj_t*  a,
                        obj_t*  x,
                        cntx_t* cntx,
                        trmv_t* cntl )
{
	num_t     dt_a      = bli_obj_datatype( *a );
	num_t     dt_x      = bli_obj_datatype( *x );

	uplo_t    uplo      = bli_obj_uplo( *a );
	trans_t   trans     = bli_obj_conjtrans_status( *a );
	diag_t    diag      = bli_obj_diag( *a );

	dim_t     m         = bli_obj_length( *a );

	void*     buf_a     = bli_obj_buffer_at_off( *a );
	inc_t     rs_a      = bli_obj_row_stride( *a );
	inc_t     cs_a      = bli_obj_col_stride( *a );

	void*     buf_x     = bli_obj_buffer_at_off( *x );
	inc_t     incx      = bli_obj_vector_inc( *x );

	num_t     dt_alpha;
	void*     buf_alpha;

	FUNCPTR_T f;

	// The datatype of alpha MUST be the type union of a and x. This is to
	// prevent any unnecessary loss of information during computation.
	dt_alpha  = bli_datatype_union( dt_a, dt_x );
	buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_a][dt_x];

	// Invoke the function.
	f( uplo,
	   trans,
	   diag,
	   m,
	   buf_alpha,
	   buf_a, rs_a, cs_a,
	   buf_x, incx );
}
コード例 #7
0
ファイル: bli_addm_unb_var1.c プロジェクト: ShawnLess/blis
void bli_addm_unb_var1( obj_t*  x,
                        obj_t*  y,
                        cntx_t* cntx )
{
	num_t     dt_x      = bli_obj_datatype( *x );
	num_t     dt_y      = bli_obj_datatype( *y );

	doff_t    diagoffx  = bli_obj_diag_offset( *x );
	diag_t    diagx     = bli_obj_diag( *x );
	uplo_t    uplox     = bli_obj_uplo( *x );
	trans_t   transx    = bli_obj_conjtrans_status( *x );

	dim_t     m         = bli_obj_length( *y );
	dim_t     n         = bli_obj_width( *y );

	inc_t     rs_x      = bli_obj_row_stride( *x );
	inc_t     cs_x      = bli_obj_col_stride( *x );
	void*     buf_x     = bli_obj_buffer_at_off( *x );

	inc_t     rs_y      = bli_obj_row_stride( *y );
	inc_t     cs_y      = bli_obj_col_stride( *y );
	void*     buf_y     = bli_obj_buffer_at_off( *y );

	FUNCPTR_T f;

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_x][dt_y];

	// Invoke the function.
	f( diagoffx,
	   diagx,
	   uplox,
	   transx,
	   m,
	   n,
	   buf_x, rs_x, cs_x,
	   buf_y, rs_y, cs_y );
}
コード例 #8
0
void bli_packm_blk_var1( obj_t*   c,
                         obj_t*   p,
                         packm_thrinfo_t* t )
{
	num_t     dt_cp      = bli_obj_datatype( *c );

	struc_t   strucc     = bli_obj_struc( *c );
	doff_t    diagoffc   = bli_obj_diag_offset( *c );
	diag_t    diagc      = bli_obj_diag( *c );
	uplo_t    uploc      = bli_obj_uplo( *c );
	trans_t   transc     = bli_obj_conjtrans_status( *c );
	pack_t    schema     = bli_obj_pack_schema( *p );
	bool_t    invdiag    = bli_obj_has_inverted_diag( *p );
	bool_t    revifup    = bli_obj_is_pack_rev_if_upper( *p );
	bool_t    reviflo    = bli_obj_is_pack_rev_if_lower( *p );

	dim_t     m_p        = bli_obj_length( *p );
	dim_t     n_p        = bli_obj_width( *p );
	dim_t     m_max_p    = bli_obj_padded_length( *p );
	dim_t     n_max_p    = bli_obj_padded_width( *p );

	void*     buf_c      = bli_obj_buffer_at_off( *c );
	inc_t     rs_c       = bli_obj_row_stride( *c );
	inc_t     cs_c       = bli_obj_col_stride( *c );

	void*     buf_p      = bli_obj_buffer_at_off( *p );
	inc_t     rs_p       = bli_obj_row_stride( *p );
	inc_t     cs_p       = bli_obj_col_stride( *p );
	inc_t     is_p       = bli_obj_imag_stride( *p );
	dim_t     pd_p       = bli_obj_panel_dim( *p );
	inc_t     ps_p       = bli_obj_panel_stride( *p );

	obj_t     kappa;
	/*---initialize pointer to stop gcc complaining  2-9-16 GH --- */
	obj_t*    kappa_p = {0};
	void*     buf_kappa;

	func_t*   packm_kers;
	void*     packm_ker;

	FUNCPTR_T f;

	// Treatment of kappa (ie: packing during scaling) depends on
	// whether we are executing an induced method.
	if ( bli_is_ind_packed( schema ) )
	{
		// The value for kappa we use will depend on whether the scalar
		// attached to A has a nonzero imaginary component. If it does,
		// then we will apply the scalar during packing to facilitate
		// implementing induced complex domain algorithms in terms of
		// real domain micro-kernels. (In the aforementioned situation,
		// applying a real scalar is easy, but applying a complex one is
		// harder, so we avoid the need altogether with the code below.)
		if( thread_am_ochief( t ) )
		{
			if ( bli_obj_scalar_has_nonzero_imag( p ) )
			{
				// Detach the scalar.
				bli_obj_scalar_detach( p, &kappa );

				// Reset the attached scalar (to 1.0).
				bli_obj_scalar_reset( p );

				kappa_p = κ
			}
			else
			{
				// If the internal scalar of A has only a real component, then
				// we will apply it later (in the micro-kernel), and so we will
				// use BLIS_ONE to indicate no scaling during packing.
				kappa_p = &BLIS_ONE;
			}
		}
		kappa_p = thread_obroadcast( t, kappa_p );

		// Acquire the buffer to the kappa chosen above.
		buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p );
	}
	else // if ( bli_is_nat_packed( schema ) )
	{
		// This branch if for native execution, where we assume that
		// the micro-kernel will always apply the alpha scalar of the
		// higher-level operation. Thus, we use BLIS_ONE for kappa so
		// that the underlying packm implementation does not perform
		// any scaling during packing.
		buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
	}


	// Choose the correct func_t object based on the pack_t schema.
	if      ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4mi_kers;
	else if ( bli_is_3mi_packed( schema ) ||
	          bli_is_3ms_packed( schema ) ) packm_kers = packm_struc_cxk_3mis_kers;
	else if ( bli_is_ro_packed( schema ) ||
	          bli_is_io_packed( schema ) ||
	         bli_is_rpi_packed( schema ) )  packm_kers = packm_struc_cxk_rih_kers;
	else                                    packm_kers = packm_struc_cxk_kers;

	// Query the datatype-specific function pointer from the func_t object.
	packm_ker = bli_func_obj_query( dt_cp, packm_kers );


	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_cp];

	// Invoke the function.
	f( strucc,
	   diagoffc,
	   diagc,
	   uploc,
	   transc,
	   schema,
	   invdiag,
	   revifup,
	   reviflo,
	   m_p,
	   n_p,
	   m_max_p,
	   n_max_p,
	   buf_kappa,
	   buf_c, rs_c, cs_c,
	   buf_p, rs_p, cs_p,
	          is_p,
	          pd_p, ps_p,
	   packm_ker,
	   t );
}