void bli_obj_print( char* label, obj_t* obj ) { FILE* file = stdout; if ( bli_error_checking_is_enabled() ) bli_obj_print_check( label, obj ); fprintf( file, "\n" ); fprintf( file, "%s\n", label ); fprintf( file, "\n" ); fprintf( file, " m x n %lu x %lu\n", ( unsigned long int )bli_obj_length( *obj ), ( unsigned long int )bli_obj_width( *obj ) ); fprintf( file, "\n" ); fprintf( file, " offm, offn %lu, %lu\n", ( unsigned long int )bli_obj_row_off( *obj ), ( unsigned long int )bli_obj_col_off( *obj ) ); fprintf( file, " diagoff %ld\n", ( signed long int )bli_obj_diag_offset( *obj ) ); fprintf( file, "\n" ); fprintf( file, " buf %p\n", ( void* )bli_obj_buffer( *obj ) ); fprintf( file, " elem size %lu\n", ( unsigned long int )bli_obj_elem_size( *obj ) ); fprintf( file, " rs, cs %ld, %ld\n", ( signed long int )bli_obj_row_stride( *obj ), ( signed long int )bli_obj_col_stride( *obj ) ); fprintf( file, " is %ld\n", ( signed long int )bli_obj_imag_stride( *obj ) ); fprintf( file, " m_padded %lu\n", ( unsigned long int )bli_obj_padded_length( *obj ) ); fprintf( file, " n_padded %lu\n", ( unsigned long int )bli_obj_padded_width( *obj ) ); fprintf( file, " ps %lu\n", ( unsigned long int )bli_obj_panel_stride( *obj ) ); fprintf( file, "\n" ); fprintf( file, " info %lX\n", ( unsigned long int )(*obj).info ); fprintf( file, " - is complex %lu\n", ( unsigned long int )bli_obj_is_complex( *obj ) ); fprintf( file, " - is d. prec %lu\n", ( unsigned long int )bli_obj_is_double_precision( *obj ) ); fprintf( file, " - datatype %lu\n", ( unsigned long int )bli_obj_datatype( *obj ) ); fprintf( file, " - target dt %lu\n", ( unsigned long int )bli_obj_target_datatype( *obj ) ); fprintf( file, " - exec dt %lu\n", ( unsigned long int )bli_obj_execution_datatype( *obj ) ); fprintf( file, " - has trans %lu\n", ( unsigned long int )bli_obj_has_trans( *obj ) ); fprintf( file, " - has conj %lu\n", ( unsigned long int )bli_obj_has_conj( *obj ) ); fprintf( file, " - unit diag? %lu\n", ( unsigned long int )bli_obj_has_unit_diag( *obj ) ); fprintf( file, " - struc type %lu\n", ( unsigned long int )bli_obj_struc( *obj ) >> BLIS_STRUC_SHIFT ); fprintf( file, " - uplo type %lu\n", ( unsigned long int )bli_obj_uplo( *obj ) >> BLIS_UPLO_SHIFT ); fprintf( file, " - is upper %lu\n", ( unsigned long int )bli_obj_is_upper( *obj ) ); fprintf( file, " - is lower %lu\n", ( unsigned long int )bli_obj_is_lower( *obj ) ); fprintf( file, " - is dense %lu\n", ( unsigned long int )bli_obj_is_dense( *obj ) ); fprintf( file, " - pack schema %lu\n", ( unsigned long int )bli_obj_pack_schema( *obj ) >> BLIS_PACK_SCHEMA_SHIFT ); fprintf( file, " - packinv diag? %lu\n", ( unsigned long int )bli_obj_has_inverted_diag( *obj ) ); fprintf( file, " - pack ordifup %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_upper( *obj ) ); fprintf( file, " - pack ordiflo %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_lower( *obj ) ); fprintf( file, " - packbuf type %lu\n", ( unsigned long int )bli_obj_pack_buffer_type( *obj ) >> BLIS_PACK_BUFFER_SHIFT ); fprintf( file, "\n" ); }
siz_t bli_packv_init_pack ( pack_t schema, bszid_t bmult_id, obj_t* a, obj_t* p, cntx_t* cntx ) { num_t dt = bli_obj_dt( a ); dim_t dim_a = bli_obj_vector_dim( a ); dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx ); membrk_t* membrk = bli_cntx_membrk( cntx ); #if 0 mem_t* mem_p; #endif dim_t m_p_pad; siz_t size_p; inc_t rs_p, cs_p; void* buf; // We begin by copying the basic fields of c. bli_obj_alias_to( a, p ); // Update the dimensions. bli_obj_set_dims( dim_a, 1, p ); // Reset the view offsets to (0,0). bli_obj_set_offs( 0, 0, p ); // Set the pack schema in the p object to the value in the control tree // node. bli_obj_set_pack_schema( schema, p ); // Compute the dimensions padded by the dimension multiples. m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( p ), bmult ); // Compute the size of the packed buffer. size_p = m_p_pad * 1 * bli_obj_elem_size( p ); #if 0 // Extract the address of the mem_t object within p that will track // properties of the packed buffer. mem_p = bli_obj_pack_mem( *p ); if ( bli_mem_is_unalloc( mem_p ) ) { // If the mem_t object of p has not yet been allocated, then acquire // a memory block suitable for a vector. bli_membrk_acquire_v( membrk, size_p, mem_p ); } else { // If the mem_t object has already been allocated, then release and // re-acquire the memory so there is sufficient space. if ( bli_mem_size( mem_p ) < size_p ) { bli_membrk_release( mem_p ); bli_membrk_acquire_v( membrk, size_p, mem_p ); } } // Grab the buffer address from the mem_t object and copy it to the // main object buffer field. (Sometimes this buffer address will be // copied when the value is already up-to-date, because it persists // in the main object buffer field across loop iterations.) buf = bli_mem_buffer( mem_p ); bli_obj_set_buffer( buf, p ); #endif // Save the padded (packed) dimensions into the packed object. bli_obj_set_padded_dims( m_p_pad, 1, p ); // Set the row and column strides of p based on the pack schema. if ( schema == BLIS_PACKED_VECTOR ) { // Set the strides to reflect a column-stored vector. Note that the // column stride may never be used, and is only useful to determine // how much space beyond the vector would need to be zero-padded, if // zero-padding was needed. rs_p = 1; cs_p = bli_obj_padded_length( p ); bli_obj_set_strides( rs_p, cs_p, p ); } return size_p; }
void bli_packm_acquire_mpart_t2b( subpart_t requested_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { dim_t m, n; // For now, we only support acquiring the middle subpartition. if ( requested_part != BLIS_SUBPART1 ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } // Partitioning top-to-bottom through packed column panels (which are // row-stored) is not yet supported. if ( bli_obj_is_col_packed( *obj ) ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } // Query the dimensions of the parent object. m = bli_obj_length( *obj ); n = bli_obj_width( *obj ); // Foolproofing: do not let b exceed what's left of the m dimension at // row offset i. if ( b > m - i ) b = m - i; // Begin by copying the info, elem size, buffer, row stride, and column // stride fields of the parent object. Note that this omits copying view // information because the new partition will have its own dimensions // and offsets. bli_obj_init_subpart_from( *obj, *sub_obj ); // Modify offsets and dimensions of requested partition. bli_obj_set_dims( b, n, *sub_obj ); // Tweak the padded length of the subpartition to trick the underlying // implementation into only zero-padding for the narrow submatrix of // interest. Usually, the value we want is b (for non-edge cases), but // at the edges, we want the remainder of the mem_t region in the m // dimension. Edge cases are defined as occurring when i + b is exactly // equal to the inherited sub-object's length (which happens since the // determine_blocksize function would have returned a smaller value of // b for the edge iteration). In these cases, we arrive at the new // packed length by simply subtracting off i. { dim_t m_pack_max = bli_obj_padded_length( *sub_obj ); dim_t m_pack_cur; if ( i + b == m ) m_pack_cur = m_pack_max - i; else m_pack_cur = b; bli_obj_set_padded_length( m_pack_cur, *sub_obj ); } // Translate the desired offsets to a panel offset and adjust the // buffer pointer of the subpartition object. { char* buf_p = bli_obj_buffer( *sub_obj ); siz_t elem_size = bli_obj_elem_size( *sub_obj ); dim_t off_to_panel = bli_packm_offset_to_panel_for( i, sub_obj ); buf_p = buf_p + elem_size * off_to_panel; bli_obj_set_buffer( ( void* )buf_p, *sub_obj ); } }
void bli_obj_alloc_buffer( inc_t rs, inc_t cs, inc_t is, obj_t* obj ) { dim_t n_elem = 0; dim_t m, n; siz_t elem_size; siz_t buffer_size; void* p; // Query the dimensions of the object we are allocating. m = bli_obj_length( *obj ); n = bli_obj_width( *obj ); // Query the size of one element. elem_size = bli_obj_elem_size( *obj ); // Adjust the strides, if needed, before doing anything else // (particularly, before doing any error checking). bli_adjust_strides( m, n, elem_size, &rs, &cs, &is ); if ( bli_error_checking_is_enabled() ) bli_obj_alloc_buffer_check( rs, cs, is, obj ); // Determine how much object to allocate. if ( m == 0 || n == 0 ) { // For empty objects, set n_elem to zero. Row and column strides // should remain unchanged (because alignment is not needed). n_elem = 0; } else { // The number of elements to allocate is given by the distance from // the element with the lowest address (usually {0, 0}) to the element // with the highest address (usually {m-1, n-1}), plus one for the // highest element itself. n_elem = (m-1) * bli_abs( rs ) + (n-1) * bli_abs( cs ) + 1; } // Handle the special case where imaginary stride is larger than // normal. if ( bli_obj_is_complex( *obj ) ) { // Notice that adding is/2 works regardless of whether the // imaginary stride is unit, something between unit and // 2*n_elem, or something bigger than 2*n_elem. n_elem = bli_abs( is ) / 2 + n_elem; } // Compute the size of the total buffer to be allocated, which includes // padding if the leading dimension was increased for alignment purposes. buffer_size = ( siz_t )n_elem * elem_size; // Allocate the buffer. p = bli_malloc_user( buffer_size ); // Set individual fields. bli_obj_set_buffer( p, *obj ); bli_obj_set_strides( rs, cs, *obj ); bli_obj_set_imag_stride( is, *obj ); }