Exemplo n.º 1
0
void bli_packm_unb_var1( obj_t*   c,
                         obj_t*   p,
                         packm_thrinfo_t* thread )
{
	num_t     dt_cp     = bli_obj_datatype( *c );

	struc_t   strucc    = bli_obj_struc( *c );
	doff_t    diagoffc  = bli_obj_diag_offset( *c );
	diag_t    diagc     = bli_obj_diag( *c );
	uplo_t    uploc     = bli_obj_uplo( *c );
	trans_t   transc    = bli_obj_conjtrans_status( *c );

	dim_t     m_p       = bli_obj_length( *p );
	dim_t     n_p       = bli_obj_width( *p );
	dim_t     m_max_p   = bli_obj_padded_length( *p );
	dim_t     n_max_p   = bli_obj_padded_width( *p );

	void*     buf_c     = bli_obj_buffer_at_off( *c );
	inc_t     rs_c      = bli_obj_row_stride( *c );
	inc_t     cs_c      = bli_obj_col_stride( *c );

	void*     buf_p     = bli_obj_buffer_at_off( *p );
	inc_t     rs_p      = bli_obj_row_stride( *p );
	inc_t     cs_p      = bli_obj_col_stride( *p );

	void*     buf_kappa;

	FUNCPTR_T f;


	// This variant assumes that the computational kernel will always apply
	// the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
	// for kappa so that the underlying packm implementation does not scale
	// during packing.
	buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );

	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_cp];

    if( thread_am_ochief( thread ) ) {
        // Invoke the function.
        f( strucc,
           diagoffc,
           diagc,
           uploc,
           transc,
           m_p,
           n_p,
           m_max_p,
           n_max_p,
           buf_kappa,
           buf_c, rs_c, cs_c,
           buf_p, rs_p, cs_p );
    }
}
Exemplo n.º 2
0
void bli_obj_print( char* label, obj_t* obj )
{
	FILE*  file     = stdout;

	if ( bli_error_checking_is_enabled() )
		bli_obj_print_check( label, obj );

	fprintf( file, "\n" );
	fprintf( file, "%s\n", label );
	fprintf( file, "\n" );

	fprintf( file, " m x n           %lu x %lu\n", ( unsigned long int )bli_obj_length( *obj ),
	                                               ( unsigned long int )bli_obj_width( *obj ) );
	fprintf( file, "\n" );

	fprintf( file, " offm, offn      %lu, %lu\n", ( unsigned long int )bli_obj_row_off( *obj ),
	                                              ( unsigned long int )bli_obj_col_off( *obj ) );
	fprintf( file, " diagoff         %ld\n", ( signed long int )bli_obj_diag_offset( *obj ) );
	fprintf( file, "\n" );

	fprintf( file, " buf             %p\n",  ( void* )bli_obj_buffer( *obj ) );
	fprintf( file, " elem size       %lu\n", ( unsigned long int )bli_obj_elem_size( *obj ) );
	fprintf( file, " rs, cs          %ld, %ld\n", ( signed long int )bli_obj_row_stride( *obj ),
	                                              ( signed long int )bli_obj_col_stride( *obj ) );
	fprintf( file, " is              %ld\n", ( signed long int )bli_obj_imag_stride( *obj ) );
	fprintf( file, " m_padded        %lu\n", ( unsigned long int )bli_obj_padded_length( *obj ) );
	fprintf( file, " n_padded        %lu\n", ( unsigned long int )bli_obj_padded_width( *obj ) );
	fprintf( file, " ps              %lu\n", ( unsigned long int )bli_obj_panel_stride( *obj ) );
	fprintf( file, "\n" );

	fprintf( file, " info            %lX\n", ( unsigned long int )(*obj).info );
	fprintf( file, " - is complex    %lu\n", ( unsigned long int )bli_obj_is_complex( *obj ) );
	fprintf( file, " - is d. prec    %lu\n", ( unsigned long int )bli_obj_is_double_precision( *obj ) );
	fprintf( file, " - datatype      %lu\n", ( unsigned long int )bli_obj_datatype( *obj ) );
	fprintf( file, " - target dt     %lu\n", ( unsigned long int )bli_obj_target_datatype( *obj ) );
	fprintf( file, " - exec dt       %lu\n", ( unsigned long int )bli_obj_execution_datatype( *obj ) );
	fprintf( file, " - has trans     %lu\n", ( unsigned long int )bli_obj_has_trans( *obj ) );
	fprintf( file, " - has conj      %lu\n", ( unsigned long int )bli_obj_has_conj( *obj ) );
	fprintf( file, " - unit diag?    %lu\n", ( unsigned long int )bli_obj_has_unit_diag( *obj ) );
	fprintf( file, " - struc type    %lu\n", ( unsigned long int )bli_obj_struc( *obj ) >> BLIS_STRUC_SHIFT );
	fprintf( file, " - uplo type     %lu\n", ( unsigned long int )bli_obj_uplo( *obj ) >> BLIS_UPLO_SHIFT );
	fprintf( file, "   - is upper    %lu\n", ( unsigned long int )bli_obj_is_upper( *obj ) );
	fprintf( file, "   - is lower    %lu\n", ( unsigned long int )bli_obj_is_lower( *obj ) );
	fprintf( file, "   - is dense    %lu\n", ( unsigned long int )bli_obj_is_dense( *obj ) );
	fprintf( file, " - pack schema   %lu\n", ( unsigned long int )bli_obj_pack_schema( *obj ) >> BLIS_PACK_SCHEMA_SHIFT );
	fprintf( file, " - packinv diag? %lu\n", ( unsigned long int )bli_obj_has_inverted_diag( *obj ) );
	fprintf( file, " - pack ordifup  %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_upper( *obj ) );
	fprintf( file, " - pack ordiflo  %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_lower( *obj ) );
	fprintf( file, " - packbuf type  %lu\n", ( unsigned long int )bli_obj_pack_buffer_type( *obj ) >> BLIS_PACK_BUFFER_SHIFT );
	fprintf( file, "\n" );
}
Exemplo n.º 3
0
siz_t bli_packv_init_pack
     (
       pack_t  schema,
       bszid_t bmult_id,
       obj_t*  a,
       obj_t*  p,
       cntx_t* cntx
     )
{
	num_t     dt     = bli_obj_dt( a );
	dim_t     dim_a  = bli_obj_vector_dim( a );
	dim_t     bmult  = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );

	membrk_t* membrk = bli_cntx_membrk( cntx );

#if 0
	mem_t*    mem_p;
#endif
	dim_t     m_p_pad;
	siz_t     size_p;
	inc_t     rs_p, cs_p;
	void*     buf;


	// We begin by copying the basic fields of c.
	bli_obj_alias_to( a, p );

	// Update the dimensions.
	bli_obj_set_dims( dim_a, 1, p );

	// Reset the view offsets to (0,0).
	bli_obj_set_offs( 0, 0, p );

	// Set the pack schema in the p object to the value in the control tree
	// node.
	bli_obj_set_pack_schema( schema, p );

	// Compute the dimensions padded by the dimension multiples.
	m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( p ), bmult );

	// Compute the size of the packed buffer.
	size_p = m_p_pad * 1 * bli_obj_elem_size( p );

#if 0
	// Extract the address of the mem_t object within p that will track
	// properties of the packed buffer.
	mem_p = bli_obj_pack_mem( *p );

	if ( bli_mem_is_unalloc( mem_p ) )
	{
		// If the mem_t object of p has not yet been allocated, then acquire
		// a memory block suitable for a vector.
		bli_membrk_acquire_v( membrk,
		                      size_p,
		                      mem_p );
	}
	else
	{
 		// If the mem_t object has already been allocated, then release and
		// re-acquire the memory so there is sufficient space.
		if ( bli_mem_size( mem_p ) < size_p )
		{
			bli_membrk_release( mem_p );

			bli_membrk_acquire_v( membrk,
			                      size_p,
			                      mem_p );
		}
	}

	// Grab the buffer address from the mem_t object and copy it to the
	// main object buffer field. (Sometimes this buffer address will be
	// copied when the value is already up-to-date, because it persists
	// in the main object buffer field across loop iterations.)
	buf = bli_mem_buffer( mem_p );
	bli_obj_set_buffer( buf, p );
#endif

	// Save the padded (packed) dimensions into the packed object.
	bli_obj_set_padded_dims( m_p_pad, 1, p );

	// Set the row and column strides of p based on the pack schema.
	if ( schema == BLIS_PACKED_VECTOR )
	{
		// Set the strides to reflect a column-stored vector. Note that the
		// column stride may never be used, and is only useful to determine
		// how much space beyond the vector would need to be zero-padded, if
		// zero-padding was needed.
		rs_p = 1;
		cs_p = bli_obj_padded_length( p );

		bli_obj_set_strides( rs_p, cs_p, p );
	}

	return size_p;
}
void bli_packm_blk_var1( obj_t*   c,
                         obj_t*   p,
                         packm_thrinfo_t* t )
{
	num_t     dt_cp      = bli_obj_datatype( *c );

	struc_t   strucc     = bli_obj_struc( *c );
	doff_t    diagoffc   = bli_obj_diag_offset( *c );
	diag_t    diagc      = bli_obj_diag( *c );
	uplo_t    uploc      = bli_obj_uplo( *c );
	trans_t   transc     = bli_obj_conjtrans_status( *c );
	pack_t    schema     = bli_obj_pack_schema( *p );
	bool_t    invdiag    = bli_obj_has_inverted_diag( *p );
	bool_t    revifup    = bli_obj_is_pack_rev_if_upper( *p );
	bool_t    reviflo    = bli_obj_is_pack_rev_if_lower( *p );

	dim_t     m_p        = bli_obj_length( *p );
	dim_t     n_p        = bli_obj_width( *p );
	dim_t     m_max_p    = bli_obj_padded_length( *p );
	dim_t     n_max_p    = bli_obj_padded_width( *p );

	void*     buf_c      = bli_obj_buffer_at_off( *c );
	inc_t     rs_c       = bli_obj_row_stride( *c );
	inc_t     cs_c       = bli_obj_col_stride( *c );

	void*     buf_p      = bli_obj_buffer_at_off( *p );
	inc_t     rs_p       = bli_obj_row_stride( *p );
	inc_t     cs_p       = bli_obj_col_stride( *p );
	inc_t     is_p       = bli_obj_imag_stride( *p );
	dim_t     pd_p       = bli_obj_panel_dim( *p );
	inc_t     ps_p       = bli_obj_panel_stride( *p );

	obj_t     kappa;
	/*---initialize pointer to stop gcc complaining  2-9-16 GH --- */
	obj_t*    kappa_p = {0};
	void*     buf_kappa;

	func_t*   packm_kers;
	void*     packm_ker;

	FUNCPTR_T f;

	// Treatment of kappa (ie: packing during scaling) depends on
	// whether we are executing an induced method.
	if ( bli_is_ind_packed( schema ) )
	{
		// The value for kappa we use will depend on whether the scalar
		// attached to A has a nonzero imaginary component. If it does,
		// then we will apply the scalar during packing to facilitate
		// implementing induced complex domain algorithms in terms of
		// real domain micro-kernels. (In the aforementioned situation,
		// applying a real scalar is easy, but applying a complex one is
		// harder, so we avoid the need altogether with the code below.)
		if( thread_am_ochief( t ) )
		{
			if ( bli_obj_scalar_has_nonzero_imag( p ) )
			{
				// Detach the scalar.
				bli_obj_scalar_detach( p, &kappa );

				// Reset the attached scalar (to 1.0).
				bli_obj_scalar_reset( p );

				kappa_p = &kappa;
			}
			else
			{
				// If the internal scalar of A has only a real component, then
				// we will apply it later (in the micro-kernel), and so we will
				// use BLIS_ONE to indicate no scaling during packing.
				kappa_p = &BLIS_ONE;
			}
		}
		kappa_p = thread_obroadcast( t, kappa_p );

		// Acquire the buffer to the kappa chosen above.
		buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p );
	}
	else // if ( bli_is_nat_packed( schema ) )
	{
		// This branch if for native execution, where we assume that
		// the micro-kernel will always apply the alpha scalar of the
		// higher-level operation. Thus, we use BLIS_ONE for kappa so
		// that the underlying packm implementation does not perform
		// any scaling during packing.
		buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
	}


	// Choose the correct func_t object based on the pack_t schema.
	if      ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4mi_kers;
	else if ( bli_is_3mi_packed( schema ) ||
	          bli_is_3ms_packed( schema ) ) packm_kers = packm_struc_cxk_3mis_kers;
	else if ( bli_is_ro_packed( schema ) ||
	          bli_is_io_packed( schema ) ||
	         bli_is_rpi_packed( schema ) )  packm_kers = packm_struc_cxk_rih_kers;
	else                                    packm_kers = packm_struc_cxk_kers;

	// Query the datatype-specific function pointer from the func_t object.
	packm_ker = bli_func_obj_query( dt_cp, packm_kers );


	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_cp];

	// Invoke the function.
	f( strucc,
	   diagoffc,
	   diagc,
	   uploc,
	   transc,
	   schema,
	   invdiag,
	   revifup,
	   reviflo,
	   m_p,
	   n_p,
	   m_max_p,
	   n_max_p,
	   buf_kappa,
	   buf_c, rs_c, cs_c,
	   buf_p, rs_p, cs_p,
	          is_p,
	          pd_p, ps_p,
	   packm_ker,
	   t );
}
Exemplo n.º 5
0
void bli_packm_acquire_mpart_t2b( subpart_t requested_part,
                                  dim_t     i,
                                  dim_t     b,
                                  obj_t*    obj,
                                  obj_t*    sub_obj )
{
	dim_t m, n;

	// For now, we only support acquiring the middle subpartition.
	if ( requested_part != BLIS_SUBPART1 )
	{
		bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
	}

	// Partitioning top-to-bottom through packed column panels (which are
	// row-stored) is not yet supported.
	if ( bli_obj_is_col_packed( *obj ) )
	{
		bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
	}

	// Query the dimensions of the parent object.
	m = bli_obj_length( *obj );
	n = bli_obj_width( *obj );

	// Foolproofing: do not let b exceed what's left of the m dimension at
	// row offset i.
	if ( b > m - i ) b = m - i;

	// Begin by copying the info, elem size, buffer, row stride, and column
	// stride fields of the parent object. Note that this omits copying view
	// information because the new partition will have its own dimensions
	// and offsets.
	bli_obj_init_subpart_from( *obj, *sub_obj );

	// Modify offsets and dimensions of requested partition.
	bli_obj_set_dims( b, n, *sub_obj );

	// Tweak the padded length of the subpartition to trick the underlying
	// implementation into only zero-padding for the narrow submatrix of
	// interest. Usually, the value we want is b (for non-edge cases), but
	// at the edges, we want the remainder of the mem_t region in the m
	// dimension. Edge cases are defined as occurring when i + b is exactly
	// equal to the inherited sub-object's length (which happens since the
	// determine_blocksize function would have returned a smaller value of
	// b for the edge iteration). In these cases, we arrive at the new
	// packed length by simply subtracting off i.
	{
		dim_t  m_pack_max = bli_obj_padded_length( *sub_obj );
		dim_t  m_pack_cur;

		if ( i + b == m ) m_pack_cur = m_pack_max - i;
		else              m_pack_cur = b;

		bli_obj_set_padded_length( m_pack_cur, *sub_obj );
	}

	// Translate the desired offsets to a panel offset and adjust the
	// buffer pointer of the subpartition object.
	{
		char* buf_p        = bli_obj_buffer( *sub_obj );
		siz_t elem_size    = bli_obj_elem_size( *sub_obj );
		dim_t off_to_panel = bli_packm_offset_to_panel_for( i, sub_obj );

		buf_p = buf_p + elem_size * off_to_panel;

		bli_obj_set_buffer( ( void* )buf_p, *sub_obj );
	}
}
Exemplo n.º 6
0
void bli_packm_blk_var1_md
     (
       obj_t*   c,
       obj_t*   p,
       cntx_t*  cntx,
       cntl_t*  cntl,
       thrinfo_t* t
     )
{
	num_t     dt_c       = bli_obj_dt( c );
	num_t     dt_p       = bli_obj_dt( p );

	trans_t   transc     = bli_obj_conjtrans_status( c );
	pack_t    schema     = bli_obj_pack_schema( p );

	dim_t     m_p        = bli_obj_length( p );
	dim_t     n_p        = bli_obj_width( p );
	dim_t     m_max_p    = bli_obj_padded_length( p );
	dim_t     n_max_p    = bli_obj_padded_width( p );

	void*     buf_c      = bli_obj_buffer_at_off( c );
	inc_t     rs_c       = bli_obj_row_stride( c );
	inc_t     cs_c       = bli_obj_col_stride( c );

	void*     buf_p      = bli_obj_buffer_at_off( p );
	inc_t     rs_p       = bli_obj_row_stride( p );
	inc_t     cs_p       = bli_obj_col_stride( p );
	inc_t     is_p       = bli_obj_imag_stride( p );
	dim_t     pd_p       = bli_obj_panel_dim( p );
	inc_t     ps_p       = bli_obj_panel_stride( p );

	obj_t     kappa;
	void*     buf_kappa;

	FUNCPTR_T f;


	// Treatment of kappa (ie: packing during scaling) depends on
	// whether we are executing an induced method.
	if ( bli_is_nat_packed( schema ) )
	{
		// This branch is for native execution, where we assume that
		// the micro-kernel will always apply the alpha scalar of the
		// higher-level operation. Thus, we use BLIS_ONE for kappa so
		// that the underlying packm implementation does not perform
		// any scaling during packing.
		buf_kappa = bli_obj_buffer_for_const( dt_p, &BLIS_ONE );
	}
	else // if ( bli_is_ind_packed( schema ) )
	{
		obj_t* kappa_p;

		// The value for kappa we use will depend on whether the scalar
		// attached to A has a nonzero imaginary component. If it does,
		// then we will apply the scalar during packing to facilitate
		// implementing induced complex domain algorithms in terms of
		// real domain micro-kernels. (In the aforementioned situation,
		// applying a real scalar is easy, but applying a complex one is
		// harder, so we avoid the need altogether with the code below.)
		if ( bli_obj_scalar_has_nonzero_imag( p ) )
		{
			// Detach the scalar.
			bli_obj_scalar_detach( p, &kappa );

			// Reset the attached scalar (to 1.0).
			bli_obj_scalar_reset( p );

			kappa_p = &kappa;
		}
		else
		{
			// If the internal scalar of A has only a real component, then
			// we will apply it later (in the micro-kernel), and so we will
			// use BLIS_ONE to indicate no scaling during packing.
			kappa_p = &BLIS_ONE;
		}

		// Acquire the buffer to the kappa chosen above.
		buf_kappa = bli_obj_buffer_for_1x1( dt_p, kappa_p );
	}


	// Index into the type combination array to extract the correct
	// function pointer.
	f = ftypes[dt_c][dt_p];

	// Invoke the function.
	f(
	   transc,
	   schema,
	   m_p,
	   n_p,
	   m_max_p,
	   n_max_p,
	   buf_kappa,
	   buf_c, rs_c, cs_c,
	   buf_p, rs_p, cs_p,
	          is_p,
	          pd_p, ps_p,
	   cntx,
	   t );
}