Beispiel #1
0
void bli_trmv( obj_t*  alpha,
               obj_t*  a,
               obj_t*  x )
{
	trmv_t* trmv_cntl;
	num_t   dt_targ_a;
	num_t   dt_targ_x;
	bool_t  a_is_contig;
	bool_t  x_is_contig;
	obj_t   alpha_local;
	num_t   dt_alpha;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_trmv_check( alpha, a, x );


	// Query the target datatypes of each object.
	dt_targ_a = bli_obj_target_datatype( *a );
	dt_targ_x = bli_obj_target_datatype( *x );

	// Determine whether each operand is stored contiguously.
	a_is_contig = ( bli_obj_is_row_stored( *a ) ||
	                bli_obj_is_col_stored( *a ) );
	x_is_contig = ( bli_obj_vector_inc( *x ) == 1 );


	// Create an object to hold a copy-cast of alpha. Notice that we use
	// the type union of the target datatypes of a and x to prevent any
	// unnecessary loss of information during the computation.
	dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
	bli_obj_init_scalar_copy_of( dt_alpha,
	                             BLIS_NO_CONJUGATE,
	                             alpha,
	                             &alpha_local );

	// If all operands are contiguous, we choose a control tree for calling
	// the unblocked implementation directly without any blocking.
	if ( a_is_contig &&
	     x_is_contig )
	{
		// We use two control trees to handle the four cases corresponding to
		// combinations of transposition and row/column-storage.
		// The row-stored without transpose and column-stored with transpose
		// trees are identical. Same for the remaining two trees.
		if ( bli_obj_has_notrans( *a ) )
		{
			if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
			else                               trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
		}
		else // if ( bli_obj_has_trans( *a ) )
		{
			if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
			else                               trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
		}
	}
	else
	{
		// Mark objects with unit stride as already being packed. This prevents
		// unnecessary packing from happening within the blocked algorithm.
		if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
		if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );

		// Here, we make a similar choice as above, except that (1) we look
		// at storage tilt, and (2) we choose a tree that performs blocking.
		if ( bli_obj_has_notrans( *a ) )
		{
			if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol;
			else                               trmv_cntl = trmv_cntl_ge_ncol_trow;
		}
		else // if ( bli_obj_has_trans( *a ) )
		{
			if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow;
			else                               trmv_cntl = trmv_cntl_ge_nrow_tcol;
		}
	}


	// Invoke the internal back-end with the copy-cast of alpha and the
	// chosen control tree.
	bli_trmv_int( &alpha_local,
	              a,
	              x,
	              trmv_cntl );
}
Beispiel #2
0
void bli_gemmsup_ref_var1n
     (
       trans_t trans,
       obj_t*  alpha,
       obj_t*  a,
       obj_t*  b,
       obj_t*  beta,
       obj_t*  c,
       stor3_t eff_id,
       cntx_t* cntx,
       rntm_t* rntm
     )
{
#if 0
	obj_t at, bt;

	bli_obj_alias_to( a, &at );
	bli_obj_alias_to( b, &bt );

	// Induce transpositions on A and/or B if either object is marked for
	// transposition. We can induce "fast" transpositions since they objects
	// are guaranteed to not have structure or be packed.
	if ( bli_obj_has_trans( &at ) ) { bli_obj_induce_fast_trans( &at ); }
	if ( bli_obj_has_trans( &bt ) ) { bli_obj_induce_fast_trans( &bt ); }

	const num_t    dt_exec   = bli_obj_dt( c );

	const conj_t   conja     = bli_obj_conj_status( a );
	const conj_t   conjb     = bli_obj_conj_status( b );

	const dim_t    m         = bli_obj_length( c );
	const dim_t    n         = bli_obj_width( c );

	const dim_t    k         = bli_obj_width( &at );

	void* restrict buf_a     = bli_obj_buffer_at_off( &at );
	const inc_t    rs_a      = bli_obj_row_stride( &at );
	const inc_t    cs_a      = bli_obj_col_stride( &at );

	void* restrict buf_b     = bli_obj_buffer_at_off( &bt );
	const inc_t    rs_b      = bli_obj_row_stride( &bt );
	const inc_t    cs_b      = bli_obj_col_stride( &bt );

	void* restrict buf_c     = bli_obj_buffer_at_off( c );
	const inc_t    rs_c      = bli_obj_row_stride( c );
	const inc_t    cs_c      = bli_obj_col_stride( c );

	void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha );
	void* restrict buf_beta  = bli_obj_buffer_for_1x1( dt_exec, beta );

#else

	const num_t    dt_exec   = bli_obj_dt( c );

	const conj_t   conja     = bli_obj_conj_status( a );
	const conj_t   conjb     = bli_obj_conj_status( b );

	const dim_t    m         = bli_obj_length( c );
	const dim_t    n         = bli_obj_width( c );
	      dim_t    k;

	void* restrict buf_a = bli_obj_buffer_at_off( a );
	      inc_t    rs_a;
	      inc_t    cs_a;

	void* restrict buf_b = bli_obj_buffer_at_off( b );
	      inc_t    rs_b;
	      inc_t    cs_b;

	if ( bli_obj_has_notrans( a ) )
	{
		k     = bli_obj_width( a );

		rs_a  = bli_obj_row_stride( a );
		cs_a  = bli_obj_col_stride( a );
	}
	else // if ( bli_obj_has_trans( a ) )
	{
		// Assign the variables with an implicit transposition.
		k     = bli_obj_length( a );

		rs_a  = bli_obj_col_stride( a );
		cs_a  = bli_obj_row_stride( a );
	}

	if ( bli_obj_has_notrans( b ) )
	{
		rs_b  = bli_obj_row_stride( b );
		cs_b  = bli_obj_col_stride( b );
	}
	else // if ( bli_obj_has_trans( b ) )
	{
		// Assign the variables with an implicit transposition.
		rs_b  = bli_obj_col_stride( b );
		cs_b  = bli_obj_row_stride( b );
	}

	void* restrict buf_c     = bli_obj_buffer_at_off( c );
	const inc_t    rs_c      = bli_obj_row_stride( c );
	const inc_t    cs_c      = bli_obj_col_stride( c );

	void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha );
	void* restrict buf_beta  = bli_obj_buffer_for_1x1( dt_exec, beta );

#endif

	// Index into the type combination array to extract the correct
	// function pointer.
	FUNCPTR_T f = ftypes_var1n[dt_exec];

	if ( bli_is_notrans( trans ) )
	{
		// Invoke the function.
		f
		(
		  conja,
		  conjb,
		  m,
		  n,
		  k,
		  buf_alpha,
		  buf_a, rs_a, cs_a,
		  buf_b, rs_b, cs_b,
		  buf_beta,
		  buf_c, rs_c, cs_c,
		  eff_id,
		  cntx,
		  rntm
		);
	}
	else
	{
		// Invoke the function (transposing the operation).
		f
		(
		  conjb,             // swap the conj values.
		  conja,
		  n,                 // swap the m and n dimensions.
		  m,
		  k,
		  buf_alpha,
		  buf_b, cs_b, rs_b, // swap the positions of A and B.
		  buf_a, cs_a, rs_a, // swap the strides of A and B.
		  buf_beta,
		  buf_c, cs_c, rs_c, // swap the strides of C.
		  bli_stor3_trans( eff_id ), // transpose the stor3_t id.
		  cntx,
		  rntm
		);
	}
}