dim_t bli_trsm_determine_kc_b( dim_t i, dim_t dim, obj_t* obj, blksz_t* bsize ) { num_t dt; dim_t mr; dim_t b_alg, b_max; dim_t b_use; // We assume that this function is being called from an algorithm that // is moving "backward" (ie: bottom to top, right to left, bottom-right // to top-left). // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_execution_datatype( *obj ); b_alg = bli_blksz_for_type( dt, bsize ); b_max = bli_blksz_max_for_type( dt, bsize ); // Nudge the default and maximum kc blocksizes up to the nearest // multiple of MR. We always use MR (rather than sometimes use NR // because even when the triangle is on the right, packing of that // matrix uses MR, since only left-side trsm micro-kernels are // supported. mr = bli_info_get_default_mr( dt ); b_alg = bli_align_dim_to_mult( b_alg, mr ); b_max = bli_align_dim_to_mult( b_max, mr ); b_use = bli_determine_blocksize_b_sub( i, dim, b_alg, b_max ); return b_use; }
dim_t bli_gemm_determine_kc_b( dim_t i, dim_t dim, obj_t* a, obj_t* b, blksz_t* bsize ) { num_t dt; dim_t mnr; dim_t b_alg, b_max; dim_t b_use; // We assume that this function is being called from an algorithm that // is moving "backward" (ie: bottom to top, right to left, bottom-right // to top-left). // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_execution_datatype( *a ); b_alg = bli_blksz_get_def( dt, bsize ); b_max = bli_blksz_get_max( dt, bsize ); // Nudge the default and maximum kc blocksizes up to the nearest // multiple of MR if A is Hermitian or symmetric, or NR if B is // Hermitian or symmetric. If neither case applies, then we leave // the blocksizes unchanged. if ( bli_obj_root_is_herm_or_symm( *a ) ) { mnr = bli_blksz_get_mr( dt, bsize ); b_alg = bli_align_dim_to_mult( b_alg, mnr ); b_max = bli_align_dim_to_mult( b_max, mnr ); } else if ( bli_obj_root_is_herm_or_symm( *b ) ) { mnr = bli_blksz_get_nr( dt, bsize ); b_alg = bli_align_dim_to_mult( b_alg, mnr ); b_max = bli_align_dim_to_mult( b_max, mnr ); } b_use = bli_determine_blocksize_b_sub( i, dim, b_alg, b_max ); return b_use; }
siz_t bli_packv_init_pack ( pack_t schema, bszid_t bmult_id, obj_t* a, obj_t* p, cntx_t* cntx ) { num_t dt = bli_obj_dt( a ); dim_t dim_a = bli_obj_vector_dim( a ); dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx ); membrk_t* membrk = bli_cntx_membrk( cntx ); #if 0 mem_t* mem_p; #endif dim_t m_p_pad; siz_t size_p; inc_t rs_p, cs_p; void* buf; // We begin by copying the basic fields of c. bli_obj_alias_to( a, p ); // Update the dimensions. bli_obj_set_dims( dim_a, 1, p ); // Reset the view offsets to (0,0). bli_obj_set_offs( 0, 0, p ); // Set the pack schema in the p object to the value in the control tree // node. bli_obj_set_pack_schema( schema, p ); // Compute the dimensions padded by the dimension multiples. m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( p ), bmult ); // Compute the size of the packed buffer. size_p = m_p_pad * 1 * bli_obj_elem_size( p ); #if 0 // Extract the address of the mem_t object within p that will track // properties of the packed buffer. mem_p = bli_obj_pack_mem( *p ); if ( bli_mem_is_unalloc( mem_p ) ) { // If the mem_t object of p has not yet been allocated, then acquire // a memory block suitable for a vector. bli_membrk_acquire_v( membrk, size_p, mem_p ); } else { // If the mem_t object has already been allocated, then release and // re-acquire the memory so there is sufficient space. if ( bli_mem_size( mem_p ) < size_p ) { bli_membrk_release( mem_p ); bli_membrk_acquire_v( membrk, size_p, mem_p ); } } // Grab the buffer address from the mem_t object and copy it to the // main object buffer field. (Sometimes this buffer address will be // copied when the value is already up-to-date, because it persists // in the main object buffer field across loop iterations.) buf = bli_mem_buffer( mem_p ); bli_obj_set_buffer( buf, p ); #endif // Save the padded (packed) dimensions into the packed object. bli_obj_set_padded_dims( m_p_pad, 1, p ); // Set the row and column strides of p based on the pack schema. if ( schema == BLIS_PACKED_VECTOR ) { // Set the strides to reflect a column-stored vector. Note that the // column stride may never be used, and is only useful to determine // how much space beyond the vector would need to be zero-padded, if // zero-padding was needed. rs_p = 1; cs_p = bli_obj_padded_length( p ); bli_obj_set_strides( rs_p, cs_p, p ); } return size_p; }