示例#1
0
void bli_obj_create_const( double value, obj_t* obj )
{
	gint_t*   temp_i;
	float*    temp_s;
	double*   temp_d;
	scomplex* temp_c;
	dcomplex* temp_z;

	if ( bli_error_checking_is_enabled() )
		bli_obj_create_const_check( value, obj );

	bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, obj );

	temp_s = bli_obj_buffer_for_const( BLIS_FLOAT,    *obj );
	temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE,   *obj );
	temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *obj );
	temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *obj );
	temp_i = bli_obj_buffer_for_const( BLIS_INT,      *obj );

	// Use the bli_??sets() macros to set the temp variables in order to
	// properly support BLIS_ENABLE_C99_COMPLEX.
	bli_dssets( value, 0.0, *temp_s );
	bli_ddsets( value, 0.0, *temp_d );
	bli_dcsets( value, 0.0, *temp_c );
	bli_dzsets( value, 0.0, *temp_z );

	*temp_i = ( gint_t ) value;
}
示例#2
0
void bli_obj_create_const_copy_of( obj_t* a, obj_t* b )
{
	gint_t*   temp_i;
	float*    temp_s;
	double*   temp_d;
	scomplex* temp_c;
	dcomplex* temp_z;
	void*     buf_a;
	dcomplex  value;

	if ( bli_error_checking_is_enabled() )
		bli_obj_create_const_copy_of_check( a, b );

	bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, b );

	temp_s = bli_obj_buffer_for_const( BLIS_FLOAT,    *b );
	temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE,   *b );
	temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *b );
	temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *b );
	temp_i = bli_obj_buffer_for_const( BLIS_INT,      *b );

	buf_a = bli_obj_buffer_at_off( *a );

	bli_zzsets( 0.0, 0.0, value ); 

	if ( bli_obj_is_float( *a ) )
	{
		bli_szcopys( *(( float*    )buf_a), value );
	}
	else if ( bli_obj_is_double( *a ) )
	{
		bli_dzcopys( *(( double*   )buf_a), value );
	}
	else if ( bli_obj_is_scomplex( *a ) )
	{
		bli_czcopys( *(( scomplex* )buf_a), value );
	}
	else if ( bli_obj_is_dcomplex( *a ) )
	{
		bli_zzcopys( *(( dcomplex* )buf_a), value );
	}
	else
	{
		bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
	}

	bli_zscopys( value, *temp_s );
	bli_zdcopys( value, *temp_d );
	bli_zccopys( value, *temp_c );
	bli_zzcopys( value, *temp_z );

	*temp_i = ( gint_t ) bli_zreal( value );
}
示例#3
0
void bli_fprintm( FILE* file, char* s1, obj_t* x, char* format, char* s2 )
{
	num_t     dt_x      = bli_obj_datatype( *x );

	dim_t     m         = bli_obj_length( *x );
	dim_t     n         = bli_obj_width( *x );

	inc_t     rs_x      = bli_obj_row_stride( *x );
	inc_t     cs_x      = bli_obj_col_stride( *x );
	void*     buf_x     = bli_obj_buffer_at_off( *x );

	FUNCPTR_T f;

	if ( bli_error_checking_is_enabled() )
		bli_fprintm_check( file, s1, x, format, s2 );

	// Handle constants up front.
	if ( dt_x == BLIS_CONSTANT )
	{
		float*    sp = bli_obj_buffer_for_const( BLIS_FLOAT,    *x );
		double*   dp = bli_obj_buffer_for_const( BLIS_DOUBLE,   *x );
		scomplex* cp = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *x );
		dcomplex* zp = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *x );
		gint_t*   ip = bli_obj_buffer_for_const( BLIS_INT,      *x );

		fprintf( file, "%s\n", s1 );
		fprintf( file, " float:     %9.2e\n",         bli_sreal( *sp ) );
		fprintf( file, " double:    %9.2e\n",         bli_dreal( *dp ) );
		fprintf( file, " scomplex:  %9.2e + %9.2e\n", bli_creal( *cp ), bli_cimag( *cp ) );
		fprintf( file, " dcomplex:  %9.2e + %9.2e\n", bli_zreal( *zp ), bli_zimag( *zp ) );
		fprintf( file, " int:       %ld\n",           *ip );
		fprintf( file, "\n" );
		return;
	}

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_x];

	// Invoke the function.
	f( file,
	   s1,
	   m,
	   n,
	   buf_x, rs_x, cs_x,
	   format,
	   s2 );
}
示例#4
0
void bli_packm_unb_var1( obj_t*   c,
                         obj_t*   p,
                         packm_thrinfo_t* thread )
{
	num_t     dt_cp     = bli_obj_datatype( *c );

	struc_t   strucc    = bli_obj_struc( *c );
	doff_t    diagoffc  = bli_obj_diag_offset( *c );
	diag_t    diagc     = bli_obj_diag( *c );
	uplo_t    uploc     = bli_obj_uplo( *c );
	trans_t   transc    = bli_obj_conjtrans_status( *c );

	dim_t     m_p       = bli_obj_length( *p );
	dim_t     n_p       = bli_obj_width( *p );
	dim_t     m_max_p   = bli_obj_padded_length( *p );
	dim_t     n_max_p   = bli_obj_padded_width( *p );

	void*     buf_c     = bli_obj_buffer_at_off( *c );
	inc_t     rs_c      = bli_obj_row_stride( *c );
	inc_t     cs_c      = bli_obj_col_stride( *c );

	void*     buf_p     = bli_obj_buffer_at_off( *p );
	inc_t     rs_p      = bli_obj_row_stride( *p );
	inc_t     cs_p      = bli_obj_col_stride( *p );

	void*     buf_kappa;

	FUNCPTR_T f;


	// This variant assumes that the computational kernel will always apply
	// the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
	// for kappa so that the underlying packm implementation does not scale
	// during packing.
	buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_cp];

    if( thread_am_ochief( thread ) ) {
        // Invoke the function.
        f( strucc,
           diagoffc,
           diagc,
           uploc,
           transc,
           m_p,
           n_p,
           m_max_p,
           n_max_p,
           buf_kappa,
           buf_c, rs_c, cs_c,
           buf_p, rs_p, cs_p );
    }
}
示例#5
0
void bli_obj_scalar_reset( obj_t* a )
{
	num_t dt       = bli_obj_datatype( *a );
	void* scalar_a = bli_obj_internal_scalar_buffer( *a );
	void* one      = bli_obj_buffer_for_const( dt, BLIS_ONE );

	if      ( bli_is_float( dt )    ) *(( float*    )scalar_a) = *(( float*    )one);
	else if ( bli_is_double( dt )   ) *(( double*   )scalar_a) = *(( double*   )one);
	else if ( bli_is_scomplex( dt ) ) *(( scomplex* )scalar_a) = *(( scomplex* )one);
	else if ( bli_is_dcomplex( dt ) ) *(( dcomplex* )scalar_a) = *(( dcomplex* )one);

	// Alternate implementation:
	//bli_obj_scalar_attach( BLIS_NO_CONJUGATE, &BLIS_ONE, a );
}
void bli_packm_blk_var1( obj_t*   c,
                         obj_t*   p,
                         packm_thrinfo_t* t )
{
	num_t     dt_cp      = bli_obj_datatype( *c );

	struc_t   strucc     = bli_obj_struc( *c );
	doff_t    diagoffc   = bli_obj_diag_offset( *c );
	diag_t    diagc      = bli_obj_diag( *c );
	uplo_t    uploc      = bli_obj_uplo( *c );
	trans_t   transc     = bli_obj_conjtrans_status( *c );
	pack_t    schema     = bli_obj_pack_schema( *p );
	bool_t    invdiag    = bli_obj_has_inverted_diag( *p );
	bool_t    revifup    = bli_obj_is_pack_rev_if_upper( *p );
	bool_t    reviflo    = bli_obj_is_pack_rev_if_lower( *p );

	dim_t     m_p        = bli_obj_length( *p );
	dim_t     n_p        = bli_obj_width( *p );
	dim_t     m_max_p    = bli_obj_padded_length( *p );
	dim_t     n_max_p    = bli_obj_padded_width( *p );

	void*     buf_c      = bli_obj_buffer_at_off( *c );
	inc_t     rs_c       = bli_obj_row_stride( *c );
	inc_t     cs_c       = bli_obj_col_stride( *c );

	void*     buf_p      = bli_obj_buffer_at_off( *p );
	inc_t     rs_p       = bli_obj_row_stride( *p );
	inc_t     cs_p       = bli_obj_col_stride( *p );
	inc_t     is_p       = bli_obj_imag_stride( *p );
	dim_t     pd_p       = bli_obj_panel_dim( *p );
	inc_t     ps_p       = bli_obj_panel_stride( *p );

	obj_t     kappa;
	/*---initialize pointer to stop gcc complaining  2-9-16 GH --- */
	obj_t*    kappa_p = {0};
	void*     buf_kappa;

	func_t*   packm_kers;
	void*     packm_ker;

	FUNCPTR_T f;

	// Treatment of kappa (ie: packing during scaling) depends on
	// whether we are executing an induced method.
	if ( bli_is_ind_packed( schema ) )
	{
		// The value for kappa we use will depend on whether the scalar
		// attached to A has a nonzero imaginary component. If it does,
		// then we will apply the scalar during packing to facilitate
		// implementing induced complex domain algorithms in terms of
		// real domain micro-kernels. (In the aforementioned situation,
		// applying a real scalar is easy, but applying a complex one is
		// harder, so we avoid the need altogether with the code below.)
		if( thread_am_ochief( t ) )
		{
			if ( bli_obj_scalar_has_nonzero_imag( p ) )
			{
				// Detach the scalar.
				bli_obj_scalar_detach( p, &kappa );

				// Reset the attached scalar (to 1.0).
				bli_obj_scalar_reset( p );

				kappa_p = κ
			}
			else
			{
				// If the internal scalar of A has only a real component, then
				// we will apply it later (in the micro-kernel), and so we will
				// use BLIS_ONE to indicate no scaling during packing.
				kappa_p = &BLIS_ONE;
			}
		}
		kappa_p = thread_obroadcast( t, kappa_p );

		// Acquire the buffer to the kappa chosen above.
		buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p );
	}
	else // if ( bli_is_nat_packed( schema ) )
	{
		// This branch if for native execution, where we assume that
		// the micro-kernel will always apply the alpha scalar of the
		// higher-level operation. Thus, we use BLIS_ONE for kappa so
		// that the underlying packm implementation does not perform
		// any scaling during packing.
		buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
	}


	// Choose the correct func_t object based on the pack_t schema.
	if      ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4mi_kers;
	else if ( bli_is_3mi_packed( schema ) ||
	          bli_is_3ms_packed( schema ) ) packm_kers = packm_struc_cxk_3mis_kers;
	else if ( bli_is_ro_packed( schema ) ||
	          bli_is_io_packed( schema ) ||
	         bli_is_rpi_packed( schema ) )  packm_kers = packm_struc_cxk_rih_kers;
	else                                    packm_kers = packm_struc_cxk_kers;

	// Query the datatype-specific function pointer from the func_t object.
	packm_ker = bli_func_obj_query( dt_cp, packm_kers );


	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_cp];

	// Invoke the function.
	f( strucc,
	   diagoffc,
	   diagc,
	   uploc,
	   transc,
	   schema,
	   invdiag,
	   revifup,
	   reviflo,
	   m_p,
	   n_p,
	   m_max_p,
	   n_max_p,
	   buf_kappa,
	   buf_c, rs_c, cs_c,
	   buf_p, rs_p, cs_p,
	          is_p,
	          pd_p, ps_p,
	   packm_ker,
	   t );
}
示例#7
0
void bli_packm_blk_var1_md
     (
       obj_t*   c,
       obj_t*   p,
       cntx_t*  cntx,
       cntl_t*  cntl,
       thrinfo_t* t
     )
{
	num_t     dt_c       = bli_obj_dt( c );
	num_t     dt_p       = bli_obj_dt( p );

	trans_t   transc     = bli_obj_conjtrans_status( c );
	pack_t    schema     = bli_obj_pack_schema( p );

	dim_t     m_p        = bli_obj_length( p );
	dim_t     n_p        = bli_obj_width( p );
	dim_t     m_max_p    = bli_obj_padded_length( p );
	dim_t     n_max_p    = bli_obj_padded_width( p );

	void*     buf_c      = bli_obj_buffer_at_off( c );
	inc_t     rs_c       = bli_obj_row_stride( c );
	inc_t     cs_c       = bli_obj_col_stride( c );

	void*     buf_p      = bli_obj_buffer_at_off( p );
	inc_t     rs_p       = bli_obj_row_stride( p );
	inc_t     cs_p       = bli_obj_col_stride( p );
	inc_t     is_p       = bli_obj_imag_stride( p );
	dim_t     pd_p       = bli_obj_panel_dim( p );
	inc_t     ps_p       = bli_obj_panel_stride( p );

	obj_t     kappa;
	void*     buf_kappa;

	FUNCPTR_T f;


	// Treatment of kappa (ie: packing during scaling) depends on
	// whether we are executing an induced method.
	if ( bli_is_nat_packed( schema ) )
	{
		// This branch is for native execution, where we assume that
		// the micro-kernel will always apply the alpha scalar of the
		// higher-level operation. Thus, we use BLIS_ONE for kappa so
		// that the underlying packm implementation does not perform
		// any scaling during packing.
		buf_kappa = bli_obj_buffer_for_const( dt_p, &BLIS_ONE );
	}
	else // if ( bli_is_ind_packed( schema ) )
	{
		obj_t* kappa_p;

		// The value for kappa we use will depend on whether the scalar
		// attached to A has a nonzero imaginary component. If it does,
		// then we will apply the scalar during packing to facilitate
		// implementing induced complex domain algorithms in terms of
		// real domain micro-kernels. (In the aforementioned situation,
		// applying a real scalar is easy, but applying a complex one is
		// harder, so we avoid the need altogether with the code below.)
		if ( bli_obj_scalar_has_nonzero_imag( p ) )
		{
			// Detach the scalar.
			bli_obj_scalar_detach( p, &kappa );

			// Reset the attached scalar (to 1.0).
			bli_obj_scalar_reset( p );

			kappa_p = κ
		}
		else
		{
			// If the internal scalar of A has only a real component, then
			// we will apply it later (in the micro-kernel), and so we will
			// use BLIS_ONE to indicate no scaling during packing.
			kappa_p = &BLIS_ONE;
		}

		// Acquire the buffer to the kappa chosen above.
		buf_kappa = bli_obj_buffer_for_1x1( dt_p, kappa_p );
	}


	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_c][dt_p];

	// Invoke the function.
	f(
	   transc,
	   schema,
	   m_p,
	   n_p,
	   m_max_p,
	   n_max_p,
	   buf_kappa,
	   buf_c, rs_c, cs_c,
	   buf_p, rs_p, cs_p,
	          is_p,
	          pd_p, ps_p,
	   cntx,
	   t );
}