Пример #1
0
dim_t bli_packm_offset_to_panel_for( dim_t offmn, obj_t* p )
{
	dim_t panel_off;

	if      ( bli_obj_pack_schema( *p ) == BLIS_PACKED_ROWS )
	{
		// For the "packed rows" schema, a single row is effectively one
		// row panel, and so we use the row offset as the panel offset.
		// Then we multiply this offset by the effective panel stride
		// (ie: the row stride) to arrive at the desired offset.
		panel_off = offmn * bli_obj_row_stride( *p );
	}
	else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_COLUMNS )
	{
		// For the "packed columns" schema, a single column is effectively one
		// column panel, and so we use the column offset as the panel offset.
		// Then we multiply this offset by the effective panel stride
		// (ie: the column stride) to arrive at the desired offset.
		panel_off = offmn * bli_obj_col_stride( *p );
	}
	else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_ROW_PANELS )
	{
		// For the "packed row panels" schema, the column stride is equal to
		// the panel dimension (length). So we can divide it into offmn
		// (interpreted as a row offset) to arrive at a panel offset. Then
		// we multiply this offset by the panel stride to arrive at the total
		// offset to the panel (in units of elements).
		panel_off = offmn / bli_obj_col_stride( *p );
		panel_off = panel_off * bli_obj_panel_stride( *p );

		// Sanity check.
		if ( offmn % bli_obj_col_stride( *p ) > 0 ) bli_abort();
	}
	else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_COL_PANELS )
	{
		// For the "packed column panels" schema, the row stride is equal to
		// the panel dimension (width). So we can divide it into offmn
		// (interpreted as a column offset) to arrive at a panel offset. Then
		// we multiply this offset by the panel stride to arrive at the total
		// offset to the panel (in units of elements).
		panel_off = offmn / bli_obj_row_stride( *p );
		panel_off = panel_off * bli_obj_panel_stride( *p );

		// Sanity check.
		if ( offmn % bli_obj_row_stride( *p ) > 0 ) bli_abort();
	}
	else
	{
		panel_off = 0;
		bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
	}

	return panel_off;
}
Пример #2
0
num_t bli_ind_map_cdt_to_index( num_t dt )
{
	// A non-complex datatype should never be passed in.
	if ( !bli_is_complex( dt ) ) bli_abort();

	// Map the complex datatype to a zero-based index.
	if         ( bli_is_scomplex( dt ) )    return 0;
	else /* if ( bli_is_dcomplex( dt ) ) */ return 1;
}
Пример #3
0
err_t bli_check_error_code_helper( gint_t code, char* file, guint_t line )
{
	if ( code == BLIS_SUCCESS ) return code;

	if ( BLIS_ERROR_CODE_MAX < code && code < BLIS_ERROR_CODE_MIN )
	{
		bli_print_msg( bli_error_string_for_code( code ),
		               file, line );
		bli_abort();
	}
	else
	{
		bli_print_msg( bli_error_string_for_code( BLIS_UNDEFINED_ERROR_CODE ),
		               file, line );
		bli_abort();
	}

	return code;
}
Пример #4
0
void* bli_malloc_align
     (
       malloc_ft f,
       size_t    size,
       size_t    align_size
     )
{
	const size_t ptr_size     = sizeof( void* );
	size_t       align_offset = 0;
	void*        p_orig;
	int8_t*      p_byte;
	void**       p_addr;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_malloc_align_check( f, size, align_size );

	// Return early if zero bytes were requested.
	if ( size == 0 ) return NULL;

	// Add the alignment size and the size of a pointer to the number
	// of bytes to allocate.
	size += align_size + ptr_size;

	// Call the allocation function.
	p_orig = f( size );

	// If NULL was returned, something is probably very wrong.
	if ( p_orig == NULL ) bli_abort();

	// Advance the pointer by one pointer element.
	p_byte = p_orig;
	p_byte += ptr_size;

	// Compute the offset to the desired alignment.
	if ( bli_is_unaligned_to( ( siz_t )p_byte, ( siz_t )align_size ) )
	{
		align_offset = align_size -
		               bli_offset_past_alignment( ( siz_t )p_byte,
		                                          ( siz_t )align_size );
	}

	// Advance the pointer using the difference between the alignment
	// size and the alignment offset.
	p_byte += align_offset;

	// Compute the address of the pointer element just before the start
	// of the aligned address, and store the original address there.
	p_addr = ( void** )(p_byte - ptr_size);
	*p_addr = p_orig;

	// Return the aligned pointer.
	return p_byte;
}
Пример #5
0
double bli_clock_helper()
{
	LARGE_INTEGER clock_freq = {0};
	LARGE_INTEGER clock_val;
	BOOL          r_val;

	r_val = QueryPerformanceFrequency( &clock_freq );

	if ( r_val == 0 )
	{
		bli_print_msg( "QueryPerformanceFrequency() failed", __FILE__, __LINE__ );
		bli_abort();
	}

	r_val = QueryPerformanceCounter( &clock_val );

	if ( r_val == 0 )
	{
		bli_print_msg( "QueryPerformanceCounter() failed", __FILE__, __LINE__ );
		bli_abort();
	}

	return ( ( double) clock_val.QuadPart / ( double) clock_freq.QuadPart );
}
Пример #6
0
/* Subroutine */ int PASTEF770(xerbla)(bla_character *srname, bla_integer *info, ftnlen srname_len)
{
/*  -- LAPACK auxiliary routine (preliminary version) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     February 29, 1992 */

/*     .. Scalar Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  XERBLA  is an error handler for the LAPACK routines. */
/*  It is called by an LAPACK routine if an input parameter has an */
/*  invalid value.  A message is printed and execution stops. */

/*  Installers may consider modifying the STOP statement in order to */
/*  call system-specific exception-handling facilities. */

/*  Arguments */
/*  ========= */

/*  SRNAME  (input) CHARACTER*6 */
/*          The name of the routine which called XERBLA. */

/*  INFO    (input) INTEGER */
/*          The position of the invalid parameter in the parameter list */
/*          of the calling routine. */
    int i;

    for ( i = 0; i < srname_len; ++i )
        srname[i] = toupper( srname[i] );

    printf("** On entry to %6s, parameter number %2i had an illegal value\n",
        srname, (int)*info);

    bli_abort();

/*     End of XERBLA */

    return 0;
} /* xerbla */
Пример #7
0
func_t* bli_gemm_cntl_ukrs( gemm_t* cntl )
{
	dim_t max_depth = 10;
	dim_t i;

	for ( i = 0; ; ++i )
	{
		// If the gemm sub-tree is NULL, we are at the leaf.
		if ( cntl_sub_gemm( cntl ) == NULL ) break;

		// If the above branch was not taken, we can assume the gemm
		// sub-tree is valid. Here, we step down into that sub-tree.
		cntl = cntl_sub_gemm( cntl );

		// Safeguard against infinite loops due to bad control tree
		// configuration.
		if ( i == max_depth ) bli_abort();
	}

	return cntl_gemm_ukrs( cntl );
}
Пример #8
0
void bli_abort_msg( char* message )
{
    fprintf( stderr, "BLIS: %s\n", message );
    fprintf( stderr, "BLIS: Aborting.\n" );
    bli_abort();
}
Пример #9
0
void bli_unpackv_int( obj_t*     p,
                      obj_t*     a,
                      cntx_t*    cntx,
                      unpackv_t* cntl )
{
	// The unpackv operation consists of an optional casting post-process.
	// (This post-process is analogous to the cast pre-process in packv.)
	// Here are the following possible ways unpackv can execute:
	//  1. unpack and cast: Unpack to a temporary vector c and then cast
	//     c to a.
	//  2. unpack only: Unpack directly to vector a since typecasting is
	//     not needed.
	//  3. cast only: Not yet supported / not used.
	//  4. no-op: The control tree directs us to skip the unpack operation
	//     entirely. No action is taken.

	obj_t     c;

	varnum_t  n;
	impl_t    i;
	FUNCPTR_T f;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_unpackv_check( p, a, cntx );

	// Sanity check; A should never have a zero dimension. If we must support
	// it, then we should fold it into the next alias-and-early-exit block.
	if ( bli_obj_has_zero_dim( *a ) ) bli_abort();

	// First check if we are to skip this operation because the control tree
	// is NULL, and if so, simply return.
	if ( cntl_is_noop( cntl ) )
	{
		return;
	}

	// If p was aliased to a during the pack stage (because it was already
	// in an acceptable packed/contiguous format), then no unpack is actually
	// necessary, so we return.
	if ( bli_obj_is_alias_of( *p, *a ) )
	{
		return;
	}

	// Now, if we are not skipping the unpack operation, then the only
	// question left is whether we are to typecast vector a after unpacking.
	if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
		bli_abort();
/*
	if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
	{
		// Initialize an object c for the intermediate typecast vector.
		bli_unpackv_init_cast( p,
		                       a,
		                       &c );
	}
	else
*/
	{
		// If no cast is needed, then aliasing object c to the original
		// vector serves as a minor optimization. This causes the unpackv
		// implementation to unpack directly into vector a.
		bli_obj_alias_to( *a, c );
	}

	// Now we are ready to proceed with the unpacking.

	// Extract the variant number and implementation type.
	n = cntl_var_num( cntl );
	i = cntl_impl_type( cntl );

	// Index into the variant array to extract the correct function pointer.
	f = vars[n][i];

	// Invoke the variant.
	f( p,
	   &c,
	   cntx,
	   cntl );

	// Now, if necessary, we cast the contents of c to vector a. If casting
	// was not necessary, then we are done because the call to the unpackv
	// implementation would have unpacked directly to vector a.
/*
	if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
	{
		// Copy/typecast vector c to vector a.
		// NOTE: Here, we use copynzv instead of copym because, in the cases
		// where we are unpacking/typecasting a real vector c to a complex
		// vector a, we want to touch only the real components of a, rather
		// than also set the imaginary components to zero. This comes about
		// because of the fact that, if we are unpacking real-to-complex,
		// then it is because all of the computation occurred in the real
		// domain, and so we would want to leave whatever imaginary values
		// there are in vector a untouched. Notice that for unpackings that
		// entail complex-to-complex data movements, the copynzv operation
		// behaves exactly as copym, so no use cases are lost (at least none
		// that I can think of).
		bli_copynzv( &c,
		             a );

		// NOTE: The above code/comment is outdated. What should happen is
		// as follows:
		// - If dt(a) is complex and dt(p) is real, then create an alias of
		//   a and then tweak it so that it looks like a real domain object.
		//   This will involve:
		//   - projecting the datatype to real domain
		//   - scaling both the row and column strides by 2
		//   ALL OF THIS should be done in the front-end, NOT here, as
		//   unpackv() won't even be needed in that case.
	}
*/
}
Пример #10
0
void bli_packv_init
     (
       obj_t*   a,
       obj_t*   p,
       cntx_t*  cntx,
       packv_t* cntl
     )
{
	// The purpose of packm_init() is to initialize an object P so that

	// a source object A can be packed into P via one of the packv
	// implementations. This initialization includes acquiring a suitable
	// block of memory from the memory allocator, if such a block of memory
	// has not already been allocated previously.

	pack_t   pack_schema;
	bszid_t  bmult_id;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_packv_check( a, p, cntx );

	// First check if we are to skip this operation because the control tree
	// is NULL, and if so, simply alias the object to its packed counterpart.
	if ( bli_cntl_is_noop( cntl ) )
	{
		bli_obj_alias_to( a, p );
		return;
	}

	// At this point, we can be assured that cntl is not NULL. Let us now
	// check to see if the object has already been packed to the desired
	// schema (as encoded in the control tree). If so, we can alias and
	// return, as above.
	// Note that in most cases, bli_obj_pack_schema() will return
	// BLIS_NOT_PACKED and thus packing will be called for (but in some
	// cases packing has already taken place). Also, not all combinations
	// of current pack status and desired pack schema are valid.
	if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) )
	{
		bli_obj_alias_to( a, p );
		return;
	}

	// Now, if we are not skipping the pack operation, then the only question
	// left is whether we are to typecast vector a before packing.
	if ( bli_obj_dt( a ) != bli_obj_target_dt( a ) )
		bli_abort();

	// Extract various fields from the control tree and pass them in
	// explicitly into _init_pack(). This allows external code generators
	// the option of bypassing usage of control trees altogether.
	pack_schema = cntl_pack_schema( cntl );
	bmult_id    = cntl_bmid( cntl );

	// Initialize object p for the final packed vector.
	bli_packv_init_pack
	(
	  pack_schema,
	  bmult_id,
	  &a,
	  p,
	  cntx
	);

	// Now p is ready to be packed.
}
Пример #11
0
void bli_gemm_int
     (
       obj_t*  alpha,
       obj_t*  a,
       obj_t*  b,
       obj_t*  beta,
       obj_t*  c,
       cntx_t* cntx,
       cntl_t* cntl,
       thrinfo_t* thread
     )
{
	obj_t     a_local;
	obj_t     b_local;
	obj_t     c_local;
	gemm_voft f;

	// Check parameters.
	if ( bli_error_checking_is_enabled() )
		bli_gemm_basic_check( alpha, a, b, beta, c, cntx );

	// If C has a zero dimension, return early.
	if ( bli_obj_has_zero_dim( *c ) ) return;

	// If A or B has a zero dimension, scale C by beta and return early.
	if ( bli_obj_has_zero_dim( *a ) ||
	     bli_obj_has_zero_dim( *b ) )
	{
        if ( bli_thread_am_ochief( thread ) )
		    bli_scalm( beta, c );
        bli_thread_obarrier( thread );
		return;
	}

	// If A or B is marked as being filled with zeros, scale C by beta and
	// return early.
	if ( bli_obj_is_zeros( *a ) ||
	     bli_obj_is_zeros( *b ) )
	{
		// This should never execute.
		bli_abort();

        if ( bli_thread_am_ochief( thread ) )
		    bli_scalm( beta, c );
        bli_thread_obarrier( thread );
		return;
	}

	// Alias A, B, and C in case we need to update attached scalars.
	bli_obj_alias_to( *a, a_local );
	bli_obj_alias_to( *b, b_local );
	bli_obj_alias_to( *c, c_local );

	// If alpha is non-unit, typecast and apply it to the scalar attached
	// to B.
	if ( !bli_obj_equals( alpha, &BLIS_ONE ) )
	{
        bli_obj_scalar_apply_scalar( alpha, &b_local );
	}

	// If beta is non-unit, typecast and apply it to the scalar attached
	// to C.
	if ( !bli_obj_equals( beta, &BLIS_ONE ) )
	{
        bli_obj_scalar_apply_scalar( beta, &c_local );
	}

	// Create the next node in the thrinfo_t structure.
	bli_thrinfo_grow( cntx, cntl, thread );

	// Extract the function pointer from the current control tree node.
	f = bli_cntl_var_func( cntl );

	// Somewhat hackish support for 3m3, 3m2, and 4m1b method implementations.
	{
		ind_t im = bli_cntx_get_ind_method( cntx );

		if ( im != BLIS_NAT )
		{
			if      ( im == BLIS_3M3  && f == bli_gemm_packa    ) f = bli_gemm3m3_packa;
			else if ( im == BLIS_3M2  && f == bli_gemm_ker_var2 ) f = bli_gemm3m2_ker_var2;
			else if ( im == BLIS_4M1B && f == bli_gemm_ker_var2 ) f = bli_gemm4mb_ker_var2;
		}
	}

	// Invoke the variant.
	f
	(
	  &a_local,
	  &b_local,
	  &c_local,
	  cntx,
	  cntl,
      thread
	);
}