Exemplo n.º 1
0
void bli_her2_int( conj_t  conjh,
                   obj_t*  alpha,
                   obj_t*  alpha_conj,
                   obj_t*  x,
                   obj_t*  y,
                   obj_t*  c,
                   cntx_t* cntx,
                   her2_t* cntl )
{
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;
	obj_t     alpha_local;
	obj_t     alpha_conj_local;
	obj_t     x_local;
	obj_t     y_local;
	obj_t     c_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
	{
		if ( bli_is_conj( conjh ) ) bli_her2_check( alpha, x, y, c );
		else                        bli_syr2_check( alpha, x, y, c );
	}

	// If C, x, or y has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( c ) ) return;
	if ( bli_obj_has_zero_dim( x ) ) return;
	if ( bli_obj_has_zero_dim( y ) ) return;

	// Alias the operands in case we need to apply conjugations.
	bli_obj_alias_to( x, &x_local );
	bli_obj_alias_to( y, &y_local );
	bli_obj_alias_to( c, &c_local );

	// If matrix C is marked for conjugation, we interpret this as a request
	// to apply a conjugation to the other operands.
	if ( bli_obj_has_conj( &c_local ) )
	{
		bli_obj_toggle_conj( &c_local );

		bli_obj_toggle_conj( &x_local );
		bli_obj_toggle_conj( &y_local );

		bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
		                                      BLIS_CONJUGATE,
		                                      alpha,
		                                      &alpha_local );
		bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha_conj ),
		                                      BLIS_CONJUGATE,
		                                      alpha_conj,
		                                      &alpha_conj_local );
	}
	else
	{
		bli_obj_alias_to( *alpha, alpha_local );
		bli_obj_alias_to( *alpha_conj, alpha_conj_local );
	}


	// Extract the variant number and implementation type.
	n = bli_cntl_var_num( cntl );
	i = bli_cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( conjh,
	   &alpha_local,
	   &alpha_conj_local,
	   &x_local,
	   &y_local,
	   &c_local,
	   cntx,
	   cntl );
}
Exemplo n.º 2
0
void bli_trsv_int( obj_t*  alpha,
                   obj_t*  a,
                   obj_t*  x,
                   cntx_t* cntx,
                   trsv_t* cntl )
{
	varnum_t  n;
	impl_t    i;
	bool_t    uplo;
	FUNCPTR_T f;
	obj_t     a_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_trsv_check( alpha, a, x );

	// If A or x has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( a ) ) return;
	if ( bli_obj_has_zero_dim( x ) ) return;

	// Alias A in case we need to induce a transformation (ie: transposition).
	bli_obj_alias_to( a, &a_local );

	// NOTE: to support cases where B is complex and A is real, we will
	// need to have the default side case be BLIS_RIGHT and then express
	// the left case in terms of it, rather than the other way around.

	// Determine uplo (for indexing to the correct function pointer).
	if ( bli_obj_is_lower( &a_local ) ) uplo = 0;
	else                                uplo = 1;

	// We do not explicitly implement the cases where A is transposed.
	// However, we can still handle them. Specifically, if A is marked as
	// needing a transposition, we simply toggle the uplo value to cause the
	// correct algorithm to be induced. When that algorithm partitions into
	// A, it will grab the correct subpartitions, which will inherit A's
	// transposition bit and thus downstream subproblems will do the right
	// thing. Alternatively, we could accomplish the same end goal by
	// inducing a transposition, via bli_obj_induce_trans(), in the code
	// block below. That macro function swaps dimensions, strides, and
	// offsets. As an example, given a lower triangular, column-major matrix
	// that needs a transpose, we would induce that transposition by recasting
	// the object as an upper triangular, row-major matrix (with no transpose
	// needed). Note that how we choose to handle transposition here does NOT
	// affect the optimal choice of kernel (ie: a column-major column panel
	// matrix with transpose times a vector would use the same kernel as a
	// row-major row panel matrix with no transpose times a vector).
	if ( bli_obj_has_trans( &a_local ) )
	{
		//bli_obj_induce_trans( &a_local );
		//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
		if ( uplo == 1 ) uplo = 0;
		else             uplo = 1;
	}

	// Extract the variant number and implementation type.
	n = bli_cntl_var_num( cntl );
	i = bli_cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[uplo][n][i];

	// Invoke the variant.
	f( alpha,
	   &a_local,
	   x,
	   cntx,
	   cntl );
}
Exemplo n.º 3
0
void bli_unpackv_int( obj_t*     p,
                      obj_t*     a,
                      cntx_t*    cntx,
                      unpackv_t* cntl )
{
	// The unpackv operation consists of an optional casting post-process.
	// (This post-process is analogous to the cast pre-process in packv.)
	// Here are the following possible ways unpackv can execute:
	//  1. unpack and cast: Unpack to a temporary vector c and then cast
	//     c to a.
	//  2. unpack only: Unpack directly to vector a since typecasting is
	//     not needed.
	//  3. cast only: Not yet supported / not used.
	//  4. no-op: The control tree directs us to skip the unpack operation
	//     entirely. No action is taken.

	obj_t     c;

	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_unpackv_check( p, a, cntx );

	// Sanity check; A should never have a zero dimension. If we must support
	// it, then we should fold it into the next alias-and-early-exit block.
	if ( bli_obj_has_zero_dim( *a ) ) bli_abort();

	// First check if we are to skip this operation because the control tree
	// is NULL, and if so, simply return.
	if ( bli_cntl_is_noop( cntl ) )
	{
		return;
	}

	// If p was aliased to a during the pack stage (because it was already
	// in an acceptable packed/contiguous format), then no unpack is actually
	// necessary, so we return.
	if ( bli_obj_is_alias_of( *p, *a ) )
	{
		return;
	}

	// Now, if we are not skipping the unpack operation, then the only
	// question left is whether we are to typecast vector a after unpacking.
	if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
		bli_abort();
/*
	if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
	{
		// Initialize an object c for the intermediate typecast vector.
		bli_unpackv_init_cast( p,
		                       a,
		                       &c );
	}
	else
*/
	{
		// If no cast is needed, then aliasing object c to the original
		// vector serves as a minor optimization. This causes the unpackv
		// implementation to unpack directly into vector a.
		bli_obj_alias_to( *a, c );
	}

	// Now we are ready to proceed with the unpacking.

	// Extract the variant number and implementation type.
	n = bli_cntl_var_num( cntl );
	i = bli_cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( p,
	   &c,
	   cntx,
	   cntl );

	// Now, if necessary, we cast the contents of c to vector a. If casting
	// was not necessary, then we are done because the call to the unpackv
	// implementation would have unpacked directly to vector a.
/*
	if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
	{
		// Copy/typecast vector c to vector a.
		// NOTE: Here, we use copynzv instead of copym because, in the cases
		// where we are unpacking/typecasting a real vector c to a complex
		// vector a, we want to touch only the real components of a, rather
		// than also set the imaginary components to zero. This comes about
		// because of the fact that, if we are unpacking real-to-complex,
		// then it is because all of the computation occurred in the real
		// domain, and so we would want to leave whatever imaginary values
		// there are in vector a untouched. Notice that for unpackings that
		// entail complex-to-complex data movements, the copynzv operation
		// behaves exactly as copym, so no use cases are lost (at least none
		// that I can think of).
		bli_copynzv( &c,
		             a );

		// NOTE: The above code/comment is outdated. What should happen is
		// as follows:
		// - If dt(a) is complex and dt(p) is real, then create an alias of
		//   a and then tweak it so that it looks like a real domain object.
		//   This will involve:
		//   - projecting the datatype to real domain
		//   - scaling both the row and column strides by 2
		//   ALL OF THIS should be done in the front-end, NOT here, as
		//   unpackv() won't even be needed in that case.
	}
*/
}
Exemplo n.º 4
0
void bli_hemv_int( conj_t  conjh,
                   obj_t*  alpha,
                   obj_t*  a,
                   obj_t*  x,
                   obj_t*  beta,
                   obj_t*  y,
                   cntx_t* cntx,
                   hemv_t* cntl )
{
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;
	obj_t     a_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
	{
		if ( bli_is_conj( conjh ) ) bli_hemv_check( alpha, a, x, beta, y );
		else                        bli_symv_check( alpha, a, x, beta, y );
	}

	// If y has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( *y ) ) return;

	// If x has a zero dimension, scale y by beta and return early.
	if ( bli_obj_has_zero_dim( *x ) )
	{
		bli_scalm( beta, y );
		return;
	}

	// Alias A in case we need to induce the upper triangular case.
	bli_obj_alias_to( *a, a_local );

/*
	// Our blocked algorithms only [explicitly] implement the lower triangular
	// case, so if matrix A is stored as upper triangular, we must toggle the
	// transposition (and conjugation) bits so that the diagonal partitioning
	// routines grab the correct partitions corresponding to the upper
	// triangular case. But we only need to do this for blocked algorithms,
	// since unblocked algorithms are responsible for handling the upper case
	// explicitly (and they should not be inspecting the transposition bit anyway).
	if ( bli_cntl_is_blocked( cntl ) && bli_obj_is_upper( *a ) )
	{
		bli_obj_toggle_conj( a_local );
		bli_obj_toggle_trans( a_local );
	}
*/

	// Extract the variant number and implementation type.
	n = bli_cntl_var_num( cntl );
	i = bli_cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( conjh,
	   alpha,
	   &a_local,
	   x,
	   beta,
	   y,
	   cntx,
	   cntl );
}
Exemplo n.º 5
0
void bli_ger_int( conj_t  conjx,
                  conj_t  conjy,
                  obj_t*  alpha,
                  obj_t*  x,
                  obj_t*  y,
                  obj_t*  a,
                  cntx_t* cntx,
                  ger_t*  cntl )
{
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;
	obj_t     alpha_local;
	obj_t     x_local;
	obj_t     y_local;
	obj_t     a_local;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_ger_check( alpha, x, y, a );

	// If A has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( a ) ) return;

	// If x or y has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( x ) ||
	     bli_obj_has_zero_dim( y ) ) return;

	// Alias the objects, applying conjx and conjy to x and y, respectively.
	bli_obj_alias_with_conj( conjx, x, &x_local );
	bli_obj_alias_with_conj( conjy, y, &y_local );
	bli_obj_alias_to( a, &a_local );

	// If matrix A is marked for conjugation, we interpret this as a request
	// to apply a conjugation to the other operands.
	if ( bli_obj_has_conj( &a_local ) )
	{
		bli_obj_toggle_conj( &a_local );

		bli_obj_toggle_conj( &x_local );
		bli_obj_toggle_conj( &y_local );

		bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
		                                      BLIS_CONJUGATE,
		                                      alpha,
		                                      &alpha_local );
	}
	else
	{
		bli_obj_alias_to( *alpha, alpha_local );
	}

	// If we are about the call a leaf-level implementation, and matrix A
	// still needs a transposition, then we must induce one by swapping the
	// strides and dimensions.
	if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( &a_local ) )
	{
		bli_obj_induce_trans( &a_local );
		bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
	}

	// Extract the variant number and implementation type.
	n = bli_cntl_var_num( cntl );
	i = bli_cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( &alpha_local,
	   &x_local,
	   &y_local,
	   &a_local,
	   cntx,
	   cntl );
}
Exemplo n.º 6
0
void bli_gemm_int( obj_t*  alpha,
                   obj_t*  a,
                   obj_t*  b,
                   obj_t*  beta,
                   obj_t*  c,
                   cntx_t* cntx,
                   gemm_t* cntl,
                   thrinfo_t* thread )
{
	obj_t     a_local;
	obj_t     b_local;
	obj_t     c_local;
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;
	ind_t     im;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_gemm_basic_check( alpha, a, b, beta, c, cntx );

	// If C has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( *c ) ) return;

	// If A or B has a zero dimension, scale C by beta and return early.
	if ( bli_obj_has_zero_dim( *a ) ||
	     bli_obj_has_zero_dim( *b ) )
	{
        if( bli_thread_am_ochief( thread ) )
		    bli_scalm( beta, c );
        bli_thread_obarrier( thread );
		return;
	}

	// If A or B is marked as being filled with zeros, scale C by beta and
	// return early.
	if ( bli_obj_is_zeros( *a ) ||
	     bli_obj_is_zeros( *b ) )
	{
        if( bli_thread_am_ochief( thread ) )
		    bli_scalm( beta, c );
        bli_thread_obarrier( thread );
		return;
	}

	// Alias A and B in case we need to update attached scalars.
	bli_obj_alias_to( *a, a_local );
	bli_obj_alias_to( *b, b_local );

	// Alias C in case we need to induce a transposition.
	bli_obj_alias_to( *c, c_local );

	// If we are about to call a leaf-level implementation, and matrix C
	// still needs a transposition, then we must induce one by swapping the
	// strides and dimensions. Note that this transposition would normally
	// be handled explicitly in the packing of C, but if C is not being
	// packed, this is our last chance to handle the transposition.
	if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( *c ) )
	{
        //if( bli_thread_am_ochief( thread ) ) {
            bli_obj_induce_trans( c_local );
            bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local );
       // }
	}

	// If alpha is non-unit, typecast and apply it to the scalar attached
	// to B.
	if ( !bli_obj_equals( alpha, &BLIS_ONE ) )
	{
        bli_obj_scalar_apply_scalar( alpha, &b_local );
	}

	// If beta is non-unit, typecast and apply it to the scalar attached
	// to C.
	if ( !bli_obj_equals( beta, &BLIS_ONE ) )
	{
        bli_obj_scalar_apply_scalar( beta, &c_local );
	}

	// Extract the variant number and implementation type.
	n = bli_cntl_var_num( cntl );
	i = bli_cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Somewhat hackish support for 3m3, 3m2, and 4m1b method implementations.
	im = bli_cntx_get_ind_method( cntx );

	if ( im != BLIS_NAT )
	{
		if      ( im == BLIS_3M3  && f == bli_gemm_blk_var1f ) f = bli_gemm_blk_var4f;
		else if ( im == BLIS_3M2  && f == bli_gemm_ker_var2  ) f = bli_gemm_ker_var4;
		else if ( im == BLIS_4M1B && f == bli_gemm_ker_var2  ) f = bli_gemm_ker_var3;
	}

	// Invoke the variant.
	f( &a_local,
	   &b_local,
	   &c_local,
	   cntx,
	   cntl,
       thread );
}
Exemplo n.º 7
0
void bli_packm_int( obj_t*   a,
                    obj_t*   p,
                    cntx_t*  cntx,
                    packm_t* cntl,
                    thrinfo_t* thread )
{
	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_packm_int_check( a, p, cntx );

	// Sanity check; A should never have a zero dimension. If we must support
	// it, then we should fold it into the next alias-and-early-exit block.
	//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();

	// First check if we are to skip this operation because the control tree
	// is NULL. We return without taking any action because a was already
	// aliased to p in packm_init().
	if ( bli_cntl_is_noop( cntl ) )
	{
		return;
	}

	// Let us now check to see if the object has already been packed. First
	// we check if it has been packed to an unspecified (row or column)
	// format, in which case we can return, since by now aliasing has already
	// taken place in packm_init().
	// NOTE: The reason we don't need to even look at the control tree in
	// this case is as follows: an object's pack status is only set to
	// BLIS_PACKED_UNSPEC for situations when the actual format used is
	// not important, as long as its packed into contiguous rows or
	// contiguous columns. A good example of this is packing for matrix
	// operands in the level-2 operations.
	if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
	{
		return;
	}

	// At this point, we can be assured that cntl is not NULL. Now we check
	// if the object has already been packed to the desired schema (as en-
	// coded in the control tree). If so, we can return, as above.
	// NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED
	// and thus packing will be called for (but in some cases packing has
	// already taken place, or does not need to take place, and so that will
	// be indicated by the pack status). Also, not all combinations of
	// current pack status and desired pack schema are valid.
	if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) )
	{
		return;
	}

	// If the object is marked as being filled with zeros, then we can skip
	// the packm operation entirely.
	if ( bli_obj_is_zeros( *a ) )
	{
		return;
	}


	// Extract the variant number and implementation type.
	n = bli_cntl_var_num( cntl );
	i = bli_cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant with kappa_use.
	f( a,
	   p,
	   cntx,
       thread );

    // Barrier so that packing is done before computation
    bli_thread_obarrier( thread );
}