Beispiel #1
0
void bli_trmv_unf_var1( obj_t*  alpha,
                        obj_t*  a,
                        obj_t*  x,
                        cntx_t* cntx,
                        trmv_t* cntl )
{
	num_t     dt_a      = bli_obj_datatype( *a );
	num_t     dt_x      = bli_obj_datatype( *x );

	uplo_t    uplo      = bli_obj_uplo( *a );
	trans_t   trans     = bli_obj_conjtrans_status( *a );
	diag_t    diag      = bli_obj_diag( *a );

	dim_t     m         = bli_obj_length( *a );

	void*     buf_a     = bli_obj_buffer_at_off( *a );
	inc_t     rs_a      = bli_obj_row_stride( *a );
	inc_t     cs_a      = bli_obj_col_stride( *a );

	void*     buf_x     = bli_obj_buffer_at_off( *x );
	inc_t     incx      = bli_obj_vector_inc( *x );

	num_t     dt_alpha;
	void*     buf_alpha;

	FUNCPTR_T f;

	// The datatype of alpha MUST be the type union of a and x. This is to
	// prevent any unnecessary loss of information during computation.
	dt_alpha  = bli_datatype_union( dt_a, dt_x );
	buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_a][dt_x];

	// Invoke the function.
	f( uplo,
	   trans,
	   diag,
	   m,
	   buf_alpha,
	   buf_a, rs_a, cs_a,
	   buf_x, incx );
}
Beispiel #2
0
void bli_hemv_unb_var1( conj_t  conjh,
                        obj_t*  alpha,
                        obj_t*  a,
                        obj_t*  x,
                        obj_t*  beta,
                        obj_t*  y,
                        cntx_t* cntx,
                        hemv_t* cntl )
{
	num_t     dt_a      = bli_obj_datatype( *a );
	num_t     dt_x      = bli_obj_datatype( *x );
	num_t     dt_y      = bli_obj_datatype( *y );

	uplo_t    uplo      = bli_obj_uplo( *a );
	conj_t    conja     = bli_obj_conj_status( *a );
	conj_t    conjx     = bli_obj_conj_status( *x );

	dim_t     m         = bli_obj_length( *a );

	void*     buf_a     = bli_obj_buffer_at_off( *a );
	inc_t     rs_a      = bli_obj_row_stride( *a );
	inc_t     cs_a      = bli_obj_col_stride( *a );

	void*     buf_x     = bli_obj_buffer_at_off( *x );
	inc_t     incx      = bli_obj_vector_inc( *x );

	void*     buf_y     = bli_obj_buffer_at_off( *y );
	inc_t     incy      = bli_obj_vector_inc( *y );

	num_t     dt_alpha;
	void*     buf_alpha;

	num_t     dt_beta;
	void*     buf_beta;

	FUNCPTR_T f;

	// The datatype of alpha MUST be the type union of a and x. This is to
	// prevent any unnecessary loss of information during computation.
	dt_alpha  = bli_datatype_union( dt_a, dt_x );
	buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );

	// The datatype of beta MUST be the same as the datatype of y.
	dt_beta   = dt_y;
	buf_beta  = bli_obj_buffer_for_1x1( dt_beta, *beta );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_a];

	// Invoke the function.
	f( uplo,
	   conja,
	   conjx,
	   conjh,
	   m,
	   buf_alpha,
	   buf_a, rs_a, cs_a,
	   buf_x, incx,
	   buf_beta,
	   buf_y, incy );
}
void bli_packm_blk_var1( obj_t*   c,
                         obj_t*   p,
                         packm_thrinfo_t* t )
{
	num_t     dt_cp      = bli_obj_datatype( *c );

	struc_t   strucc     = bli_obj_struc( *c );
	doff_t    diagoffc   = bli_obj_diag_offset( *c );
	diag_t    diagc      = bli_obj_diag( *c );
	uplo_t    uploc      = bli_obj_uplo( *c );
	trans_t   transc     = bli_obj_conjtrans_status( *c );
	pack_t    schema     = bli_obj_pack_schema( *p );
	bool_t    invdiag    = bli_obj_has_inverted_diag( *p );
	bool_t    revifup    = bli_obj_is_pack_rev_if_upper( *p );
	bool_t    reviflo    = bli_obj_is_pack_rev_if_lower( *p );

	dim_t     m_p        = bli_obj_length( *p );
	dim_t     n_p        = bli_obj_width( *p );
	dim_t     m_max_p    = bli_obj_padded_length( *p );
	dim_t     n_max_p    = bli_obj_padded_width( *p );

	void*     buf_c      = bli_obj_buffer_at_off( *c );
	inc_t     rs_c       = bli_obj_row_stride( *c );
	inc_t     cs_c       = bli_obj_col_stride( *c );

	void*     buf_p      = bli_obj_buffer_at_off( *p );
	inc_t     rs_p       = bli_obj_row_stride( *p );
	inc_t     cs_p       = bli_obj_col_stride( *p );
	inc_t     is_p       = bli_obj_imag_stride( *p );
	dim_t     pd_p       = bli_obj_panel_dim( *p );
	inc_t     ps_p       = bli_obj_panel_stride( *p );

	obj_t     kappa;
	/*---initialize pointer to stop gcc complaining  2-9-16 GH --- */
	obj_t*    kappa_p = {0};
	void*     buf_kappa;

	func_t*   packm_kers;
	void*     packm_ker;

	FUNCPTR_T f;

	// Treatment of kappa (ie: packing during scaling) depends on
	// whether we are executing an induced method.
	if ( bli_is_ind_packed( schema ) )
	{
		// The value for kappa we use will depend on whether the scalar
		// attached to A has a nonzero imaginary component. If it does,
		// then we will apply the scalar during packing to facilitate
		// implementing induced complex domain algorithms in terms of
		// real domain micro-kernels. (In the aforementioned situation,
		// applying a real scalar is easy, but applying a complex one is
		// harder, so we avoid the need altogether with the code below.)
		if( thread_am_ochief( t ) )
		{
			if ( bli_obj_scalar_has_nonzero_imag( p ) )
			{
				// Detach the scalar.
				bli_obj_scalar_detach( p, &kappa );

				// Reset the attached scalar (to 1.0).
				bli_obj_scalar_reset( p );

				kappa_p = κ
			}
			else
			{
				// If the internal scalar of A has only a real component, then
				// we will apply it later (in the micro-kernel), and so we will
				// use BLIS_ONE to indicate no scaling during packing.
				kappa_p = &BLIS_ONE;
			}
		}
		kappa_p = thread_obroadcast( t, kappa_p );

		// Acquire the buffer to the kappa chosen above.
		buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p );
	}
	else // if ( bli_is_nat_packed( schema ) )
	{
		// This branch if for native execution, where we assume that
		// the micro-kernel will always apply the alpha scalar of the
		// higher-level operation. Thus, we use BLIS_ONE for kappa so
		// that the underlying packm implementation does not perform
		// any scaling during packing.
		buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
	}


	// Choose the correct func_t object based on the pack_t schema.
	if      ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4mi_kers;
	else if ( bli_is_3mi_packed( schema ) ||
	          bli_is_3ms_packed( schema ) ) packm_kers = packm_struc_cxk_3mis_kers;
	else if ( bli_is_ro_packed( schema ) ||
	          bli_is_io_packed( schema ) ||
	         bli_is_rpi_packed( schema ) )  packm_kers = packm_struc_cxk_rih_kers;
	else                                    packm_kers = packm_struc_cxk_kers;

	// Query the datatype-specific function pointer from the func_t object.
	packm_ker = bli_func_obj_query( dt_cp, packm_kers );


	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_cp];

	// Invoke the function.
	f( strucc,
	   diagoffc,
	   diagc,
	   uploc,
	   transc,
	   schema,
	   invdiag,
	   revifup,
	   reviflo,
	   m_p,
	   n_p,
	   m_max_p,
	   n_max_p,
	   buf_kappa,
	   buf_c, rs_c, cs_c,
	   buf_p, rs_p, cs_p,
	          is_p,
	          pd_p, ps_p,
	   packm_ker,
	   t );
}
Beispiel #4
0
void libblis_test_setv_check( obj_t*  beta,
                              obj_t*  x,
                              double* resid )
{
	num_t dt_x     = bli_obj_datatype( *x );
	dim_t m_x      = bli_obj_vector_dim( *x );
	inc_t inc_x    = bli_obj_vector_inc( *x );
	void* buf_x    = bli_obj_buffer_at_off( *x );
	void* buf_beta = bli_obj_buffer_for_1x1( dt_x, *beta );
	dim_t i;

	*resid = 0.0;

	//
	// The easiest way to check that setv was successful is to confirm
	// that each element of x is equal to beta.
	//

	if      ( bli_obj_is_float( *x ) )
	{
		float*    chi1      = buf_x;
		float*    beta_cast = buf_beta;

		for ( i = 0; i < m_x; ++i )
		{
			if ( !bli_seq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			
			chi1 += inc_x;
		}
	}
	else if ( bli_obj_is_double( *x ) )
	{
		double*   chi1      = buf_x;
		double*   beta_cast = buf_beta;

		for ( i = 0; i < m_x; ++i )
		{
			if ( !bli_deq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			
			chi1 += inc_x;
		}
	}
	else if ( bli_obj_is_scomplex( *x ) )
	{
		scomplex* chi1      = buf_x;
		scomplex* beta_cast = buf_beta;

		for ( i = 0; i < m_x; ++i )
		{
			if ( !bli_ceq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			
			chi1 += inc_x;
		}
	}
	else // if ( bli_obj_is_dcomplex( *x ) )
	{
		dcomplex* chi1      = buf_x;
		dcomplex* beta_cast = buf_beta;

		for ( i = 0; i < m_x; ++i )
		{
			if ( !bli_zeq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			
			chi1 += inc_x;
		}
	}
}
Beispiel #5
0
void bli_packm_blk_var1_md
     (
       obj_t*   c,
       obj_t*   p,
       cntx_t*  cntx,
       cntl_t*  cntl,
       thrinfo_t* t
     )
{
	num_t     dt_c       = bli_obj_dt( c );
	num_t     dt_p       = bli_obj_dt( p );

	trans_t   transc     = bli_obj_conjtrans_status( c );
	pack_t    schema     = bli_obj_pack_schema( p );

	dim_t     m_p        = bli_obj_length( p );
	dim_t     n_p        = bli_obj_width( p );
	dim_t     m_max_p    = bli_obj_padded_length( p );
	dim_t     n_max_p    = bli_obj_padded_width( p );

	void*     buf_c      = bli_obj_buffer_at_off( c );
	inc_t     rs_c       = bli_obj_row_stride( c );
	inc_t     cs_c       = bli_obj_col_stride( c );

	void*     buf_p      = bli_obj_buffer_at_off( p );
	inc_t     rs_p       = bli_obj_row_stride( p );
	inc_t     cs_p       = bli_obj_col_stride( p );
	inc_t     is_p       = bli_obj_imag_stride( p );
	dim_t     pd_p       = bli_obj_panel_dim( p );
	inc_t     ps_p       = bli_obj_panel_stride( p );

	obj_t     kappa;
	void*     buf_kappa;

	FUNCPTR_T f;


	// Treatment of kappa (ie: packing during scaling) depends on
	// whether we are executing an induced method.
	if ( bli_is_nat_packed( schema ) )
	{
		// This branch is for native execution, where we assume that
		// the micro-kernel will always apply the alpha scalar of the
		// higher-level operation. Thus, we use BLIS_ONE for kappa so
		// that the underlying packm implementation does not perform
		// any scaling during packing.
		buf_kappa = bli_obj_buffer_for_const( dt_p, &BLIS_ONE );
	}
	else // if ( bli_is_ind_packed( schema ) )
	{
		obj_t* kappa_p;

		// The value for kappa we use will depend on whether the scalar
		// attached to A has a nonzero imaginary component. If it does,
		// then we will apply the scalar during packing to facilitate
		// implementing induced complex domain algorithms in terms of
		// real domain micro-kernels. (In the aforementioned situation,
		// applying a real scalar is easy, but applying a complex one is
		// harder, so we avoid the need altogether with the code below.)
		if ( bli_obj_scalar_has_nonzero_imag( p ) )
		{
			// Detach the scalar.
			bli_obj_scalar_detach( p, &kappa );

			// Reset the attached scalar (to 1.0).
			bli_obj_scalar_reset( p );

			kappa_p = &kappa;
		}
		else
		{
			// If the internal scalar of A has only a real component, then
			// we will apply it later (in the micro-kernel), and so we will
			// use BLIS_ONE to indicate no scaling during packing.
			kappa_p = &BLIS_ONE;
		}

		// Acquire the buffer to the kappa chosen above.
		buf_kappa = bli_obj_buffer_for_1x1( dt_p, kappa_p );
	}


	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_c][dt_p];

	// Invoke the function.
	f(
	   transc,
	   schema,
	   m_p,
	   n_p,
	   m_max_p,
	   n_max_p,
	   buf_kappa,
	   buf_c, rs_c, cs_c,
	   buf_p, rs_p, cs_p,
	          is_p,
	          pd_p, ps_p,
	   cntx,
	   t );
}
Beispiel #6
0
#define GENFRONT( opname ) \
\
void PASTEMAC0(opname)( \
                        obj_t*  chi, \
                        double* zeta_r, \
                        double* zeta_i  \
                      ) \
{ \
	num_t     dt_chi    = bli_obj_datatype( *chi ); \
	num_t     dt_def    = BLIS_DCOMPLEX; \
	num_t     dt_use; \
\
	/* If chi is a constant object, default to using the dcomplex
	   value to maximize precision, and since we don't know if the
	   caller needs just the real or the real and imaginary parts. */ \
	void*     buf_chi   = bli_obj_buffer_for_1x1( dt_def, *chi ); \
\
	FUNCPTR_T f; \
\
	if ( bli_error_checking_is_enabled() ) \
	    PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \
\
	/* The _check() routine prevents integer types, so we know that chi
	   is either a constant or an actual floating-point type. */ \
	if ( bli_is_constant( dt_chi ) ) dt_use = dt_def; \
	else                             dt_use = dt_chi; \
\
	/* Index into the type combination array to extract the correct
	   function pointer. */ \
	f = ftypes[dt_use]; \
\
Beispiel #7
0
void bli_gemmsup_ref_var1n
     (
       trans_t trans,
       obj_t*  alpha,
       obj_t*  a,
       obj_t*  b,
       obj_t*  beta,
       obj_t*  c,
       stor3_t eff_id,
       cntx_t* cntx,
       rntm_t* rntm
     )
{
#if 0
	obj_t at, bt;

	bli_obj_alias_to( a, &at );
	bli_obj_alias_to( b, &bt );

	// Induce transpositions on A and/or B if either object is marked for
	// transposition. We can induce "fast" transpositions since they objects
	// are guaranteed to not have structure or be packed.
	if ( bli_obj_has_trans( &at ) ) { bli_obj_induce_fast_trans( &at ); }
	if ( bli_obj_has_trans( &bt ) ) { bli_obj_induce_fast_trans( &bt ); }

	const num_t    dt_exec   = bli_obj_dt( c );

	const conj_t   conja     = bli_obj_conj_status( a );
	const conj_t   conjb     = bli_obj_conj_status( b );

	const dim_t    m         = bli_obj_length( c );
	const dim_t    n         = bli_obj_width( c );

	const dim_t    k         = bli_obj_width( &at );

	void* restrict buf_a     = bli_obj_buffer_at_off( &at );
	const inc_t    rs_a      = bli_obj_row_stride( &at );
	const inc_t    cs_a      = bli_obj_col_stride( &at );

	void* restrict buf_b     = bli_obj_buffer_at_off( &bt );
	const inc_t    rs_b      = bli_obj_row_stride( &bt );
	const inc_t    cs_b      = bli_obj_col_stride( &bt );

	void* restrict buf_c     = bli_obj_buffer_at_off( c );
	const inc_t    rs_c      = bli_obj_row_stride( c );
	const inc_t    cs_c      = bli_obj_col_stride( c );

	void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha );
	void* restrict buf_beta  = bli_obj_buffer_for_1x1( dt_exec, beta );

#else

	const num_t    dt_exec   = bli_obj_dt( c );

	const conj_t   conja     = bli_obj_conj_status( a );
	const conj_t   conjb     = bli_obj_conj_status( b );

	const dim_t    m         = bli_obj_length( c );
	const dim_t    n         = bli_obj_width( c );
	      dim_t    k;

	void* restrict buf_a = bli_obj_buffer_at_off( a );
	      inc_t    rs_a;
	      inc_t    cs_a;

	void* restrict buf_b = bli_obj_buffer_at_off( b );
	      inc_t    rs_b;
	      inc_t    cs_b;

	if ( bli_obj_has_notrans( a ) )
	{
		k     = bli_obj_width( a );

		rs_a  = bli_obj_row_stride( a );
		cs_a  = bli_obj_col_stride( a );
	}
	else // if ( bli_obj_has_trans( a ) )
	{
		// Assign the variables with an implicit transposition.
		k     = bli_obj_length( a );

		rs_a  = bli_obj_col_stride( a );
		cs_a  = bli_obj_row_stride( a );
	}

	if ( bli_obj_has_notrans( b ) )
	{
		rs_b  = bli_obj_row_stride( b );
		cs_b  = bli_obj_col_stride( b );
	}
	else // if ( bli_obj_has_trans( b ) )
	{
		// Assign the variables with an implicit transposition.
		rs_b  = bli_obj_col_stride( b );
		cs_b  = bli_obj_row_stride( b );
	}

	void* restrict buf_c     = bli_obj_buffer_at_off( c );
	const inc_t    rs_c      = bli_obj_row_stride( c );
	const inc_t    cs_c      = bli_obj_col_stride( c );

	void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha );
	void* restrict buf_beta  = bli_obj_buffer_for_1x1( dt_exec, beta );

#endif

	// Index into the type combination array to extract the correct
	// function pointer.
	FUNCPTR_T f = ftypes_var1n[dt_exec];

	if ( bli_is_notrans( trans ) )
	{
		// Invoke the function.
		f
		(
		  conja,
		  conjb,
		  m,
		  n,
		  k,
		  buf_alpha,
		  buf_a, rs_a, cs_a,
		  buf_b, rs_b, cs_b,
		  buf_beta,
		  buf_c, rs_c, cs_c,
		  eff_id,
		  cntx,
		  rntm
		);
	}
	else
	{
		// Invoke the function (transposing the operation).
		f
		(
		  conjb,             // swap the conj values.
		  conja,
		  n,                 // swap the m and n dimensions.
		  m,
		  k,
		  buf_alpha,
		  buf_b, cs_b, rs_b, // swap the positions of A and B.
		  buf_a, cs_a, rs_a, // swap the strides of A and B.
		  buf_beta,
		  buf_c, cs_c, rs_c, // swap the strides of C.
		  bli_stor3_trans( eff_id ), // transpose the stor3_t id.
		  cntx,
		  rntm
		);
	}
}
Beispiel #8
0
	void*     buf_alphax; \
	void*     buf_alphay; \
\
	obj_t     alphax_local; \
	obj_t     alphay_local; \
\
	if ( bli_error_checking_is_enabled() ) \
	    PASTEMAC(opname,_check)( alphax, alphay, x, y, z ); \
\
	/* Create local copy-casts of scalars (and apply internal conjugation
	   as needed). */ \
	bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
	                                      alphax, &alphax_local ); \
	bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
	                                      alphay, &alphay_local ); \
	buf_alphax = bli_obj_buffer_for_1x1( dt, &alphax_local ); \
	buf_alphay = bli_obj_buffer_for_1x1( dt, &alphay_local ); \
\
	/* Query a type-specific function pointer, except one that uses
	   void* for function arguments instead of typed pointers. */ \
	PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \
	PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
\
	f \
	( \
	   conjx, \
	   conjy, \
	   n, \
	   buf_alphax, \
	   buf_alphay, \
	   buf_x, inc_x, \
Beispiel #9
0
//
// Define object-based interface.
//
void bli_dotxf( obj_t*  alpha,
                obj_t*  a,
                obj_t*  x,
                obj_t*  beta,
                obj_t*  y )
{
	num_t     dt        = bli_obj_datatype( *x );

	conj_t    conja     = bli_obj_conj_status( *a );
	conj_t    conjx     = bli_obj_conj_status( *x );

	dim_t     m         = bli_obj_vector_dim( *y );
	dim_t     b_n       = bli_obj_vector_dim( *x );

	void*     buf_a     = bli_obj_buffer_at_off( *a );
	inc_t     rs_a      = bli_obj_row_stride( *a );
	inc_t     cs_a      = bli_obj_col_stride( *a );

	void*     buf_x     = bli_obj_buffer_at_off( *x );
	inc_t     inc_x     = bli_obj_vector_inc( *x );

	void*     buf_y     = bli_obj_buffer_at_off( *y );
	inc_t     inc_y     = bli_obj_vector_inc( *y );

	obj_t     alpha_local;
	void*     buf_alpha;

	obj_t     beta_local;
	void*     buf_beta;

	FUNCPTR_T f         = ftypes[dt];

	if ( bli_error_checking_is_enabled() )
	    bli_dotxf_check( alpha, a, x, beta, y );

	// Create local copy-casts of the scalars (and apply internal conjugation
	// if needed).
	bli_obj_scalar_init_detached_copy_of( dt,
	                                      BLIS_NO_CONJUGATE,
	                                      alpha,
	                                      &alpha_local );
	bli_obj_scalar_init_detached_copy_of( dt,
	                                      BLIS_NO_CONJUGATE,
	                                      beta,
	                                      &beta_local );

	// Extract the scalar buffers.
	buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local );
	buf_beta  = bli_obj_buffer_for_1x1( dt, beta_local );

	// Support cases where matrix A requires a transposition.
	if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); }

	// Invoke the void pointer-based function.
	f( conja,
	   conjx,
	   m,
	   b_n,
	   buf_alpha,
	   buf_a, rs_a, cs_a,
	   buf_x, inc_x,
	   buf_beta,
	   buf_y, inc_y );
}
Beispiel #10
0
void libblis_test_setm_check
     (
       test_params_t* params,
       obj_t*         beta,
       obj_t*         x,
       double*        resid
     )
{
	num_t dt_x     = bli_obj_datatype( *x );
	dim_t m_x      = bli_obj_length( *x );
	dim_t n_x      = bli_obj_width( *x );
	inc_t rs_x     = bli_obj_row_stride( *x );
	inc_t cs_x     = bli_obj_col_stride( *x );
	void* buf_x    = bli_obj_buffer_at_off( *x );
	void* buf_beta = bli_obj_buffer_for_1x1( dt_x, *beta );
	dim_t i, j;

	*resid = 0.0;

	//
	// The easiest way to check that setm was successful is to confirm
	// that each element of x is equal to beta.
	//

	if      ( bli_obj_is_float( *x ) )
	{
		float*    beta_cast  = buf_beta;
		float*    buf_x_cast = buf_x;
		float*    chi1;

		for ( j = 0; j < n_x; ++j )
		{
			for ( i = 0; i < m_x; ++i )
			{
				chi1 = buf_x_cast + (i  )*rs_x + (j  )*cs_x;

				if ( !bli_seq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			}
		}
	}
	else if ( bli_obj_is_double( *x ) )
	{
		double*   beta_cast  = buf_beta;
		double*   buf_x_cast = buf_x;
		double*   chi1;

		for ( j = 0; j < n_x; ++j )
		{
			for ( i = 0; i < m_x; ++i )
			{
				chi1 = buf_x_cast + (i  )*rs_x + (j  )*cs_x;

				if ( !bli_deq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			}
		}
	}
	else if ( bli_obj_is_scomplex( *x ) )
	{
		scomplex* beta_cast  = buf_beta;
		scomplex* buf_x_cast = buf_x;
		scomplex* chi1;

		for ( j = 0; j < n_x; ++j )
		{
			for ( i = 0; i < m_x; ++i )
			{
				chi1 = buf_x_cast + (i  )*rs_x + (j  )*cs_x;

				if ( !bli_ceq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			}
		}
	}
	else // if ( bli_obj_is_dcomplex( *x ) )
	{
		dcomplex* beta_cast  = buf_beta;
		dcomplex* buf_x_cast = buf_x;
		dcomplex* chi1;

		for ( j = 0; j < n_x; ++j )
		{
			for ( i = 0; i < m_x; ++i )
			{
				chi1 = buf_x_cast + (i  )*rs_x + (j  )*cs_x;

				if ( !bli_zeq( *chi1, *beta_cast ) ) { *resid = 1.0; return; }
			}
		}
	}
}
Beispiel #11
0
void bli_gemmtrsm_ukernel( obj_t*  alpha,
                           obj_t*  a1x,
                           obj_t*  a11,
                           obj_t*  bx1,
                           obj_t*  b11,
                           obj_t*  c11 )
{
    dim_t     k         = bli_obj_width( *a1x );

    num_t     dt        = bli_obj_datatype( *c11 );

    void*     buf_a1x   = bli_obj_buffer_at_off( *a1x );

    void*     buf_a11   = bli_obj_buffer_at_off( *a11 );

    void*     buf_bx1   = bli_obj_buffer_at_off( *bx1 );

    void*     buf_b11   = bli_obj_buffer_at_off( *b11 );

    void*     buf_c11   = bli_obj_buffer_at_off( *c11 );
    inc_t     rs_c      = bli_obj_row_stride( *c11 );
    inc_t     cs_c      = bli_obj_col_stride( *c11 );

    void*     buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha );

    auxinfo_t data;

    FUNCPTR_T f;

    void*     gemmtrsm_ukr;


    // Fill the auxinfo_t struct in case the micro-kernel uses it.
    if ( bli_obj_is_lower( *a11 ) )
    {
        bli_auxinfo_set_next_a( buf_a1x, data );
    }
    else
    {
        bli_auxinfo_set_next_a( buf_a11, data );
    }
    bli_auxinfo_set_next_b( buf_bx1, data );

    // Query the function address from the micro-kernel func_t object.
    if ( bli_obj_is_lower( *a11 ) )
        gemmtrsm_ukr = bli_func_obj_query( dt, gemmtrsm_l_ukrs );
    else
        gemmtrsm_ukr = bli_func_obj_query( dt, gemmtrsm_u_ukrs );

    // Index into the type combination array to extract the correct
    // function pointer.
    if ( bli_obj_is_lower( *a11 ) ) f = ftypes_l[dt];
    else                            f = ftypes_u[dt];

    // Invoke the function.
    f( k,
       buf_alpha,
       buf_a1x,
       buf_a11,
       buf_bx1,
       buf_b11,
       buf_c11, rs_c, cs_c,
       &data,
       gemmtrsm_ukr );
}
void bli_dotxaxpyf_unb_var2( obj_t*  alpha,
                             obj_t*  at,
                             obj_t*  a,
                             obj_t*  w,
                             obj_t*  x,
                             obj_t*  beta,
                             obj_t*  y,
                             obj_t*  z )
{
	num_t     dt_a      = bli_obj_datatype( *a );
	num_t     dt_x      = bli_obj_datatype( *x );
	num_t     dt_y      = bli_obj_datatype( *y );

	conj_t    conjat    = bli_obj_conj_status( *at );
	conj_t    conja     = bli_obj_conj_status( *a );
	conj_t    conjw     = bli_obj_conj_status( *w );
	conj_t    conjx     = bli_obj_conj_status( *x );

	dim_t     m         = bli_obj_vector_dim( *z );
	dim_t     b_n       = bli_obj_vector_dim( *y );

	void*     buf_a     = bli_obj_buffer_at_off( *a );
	inc_t     rs_a      = bli_obj_row_stride( *a );
	inc_t     cs_a      = bli_obj_col_stride( *a );

	inc_t     inc_w     = bli_obj_vector_inc( *w );
	void*     buf_w     = bli_obj_buffer_at_off( *w );

	inc_t     inc_x     = bli_obj_vector_inc( *x );
	void*     buf_x     = bli_obj_buffer_at_off( *x );

	inc_t     inc_y     = bli_obj_vector_inc( *y );
	void*     buf_y     = bli_obj_buffer_at_off( *y );

	inc_t     inc_z     = bli_obj_vector_inc( *z );
	void*     buf_z     = bli_obj_buffer_at_off( *z );

	num_t     dt_alpha;
	void*     buf_alpha;

	num_t     dt_beta;
	void*     buf_beta;

	FUNCPTR_T f;

	// The datatype of alpha MUST be the type union of a and x. This is to
	// prevent any unnecessary loss of information during computation.
	dt_alpha  = bli_datatype_union( dt_a, dt_x );
	buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );

	// The datatype of beta MUST be the same as the datatype of y.
	dt_beta   = dt_y;
	buf_beta  = bli_obj_buffer_for_1x1( dt_beta, *beta );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_a][dt_x][dt_y];

	// Invoke the function.
	f( conjat,
	   conja,
	   conjw,
	   conjx,
	   m,
	   b_n,
	   buf_alpha,
	   buf_a, rs_a, cs_a,
	   buf_w, inc_w,
	   buf_x, inc_x,
	   buf_beta,
	   buf_y, inc_y,
	   buf_z, inc_z );
}