void bli_blksz_reduce_dt_to( num_t dt_bm, blksz_t* bmult, num_t dt_bs, blksz_t* blksz ) { dim_t blksz_def = bli_blksz_get_def( dt_bs, blksz ); dim_t blksz_max = bli_blksz_get_max( dt_bs, blksz ); dim_t bmult_val = bli_blksz_get_def( dt_bm, bmult ); // If the blocksize multiple is zero, we do nothing. if ( bmult_val == 0 ) return; // Round the default and maximum blocksize values down to their // respective nearest multiples of bmult_val. (Notice that we // ignore the "max" entry in the bmult object since that would // correspond to the packing dimension, which plays no role // as a blocksize multiple.) blksz_def = ( blksz_def / bmult_val ) * bmult_val; blksz_max = ( blksz_max / bmult_val ) * bmult_val; // Make sure the new blocksize values are at least the blocksize // multiple. if ( blksz_def == 0 ) blksz_def = bmult_val; if ( blksz_max == 0 ) blksz_max = bmult_val; // Store the new blocksizes back to the object. bli_blksz_set_def( blksz_def, dt_bs, blksz ); bli_blksz_set_max( blksz_max, dt_bs, blksz ); }
dim_t bli_trsm_determine_kc_b( dim_t i, dim_t dim, obj_t* obj, blksz_t* bsize ) { num_t dt; dim_t mr; dim_t b_alg, b_max; dim_t b_use; // We assume that this function is being called from an algorithm that // is moving "backward" (ie: bottom to top, right to left, bottom-right // to top-left). // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_execution_datatype( *obj ); b_alg = bli_blksz_get_def( dt, bsize ); b_max = bli_blksz_get_max( dt, bsize ); // Nudge the default and maximum kc blocksizes up to the nearest // multiple of MR. We always use MR (rather than sometimes using NR) // because even when the triangle is on the right, packing of that // matrix uses MR, since only left-side trsm micro-kernels are // supported. mr = bli_blksz_get_mr( dt, bsize ); b_alg = bli_align_dim_to_mult( b_alg, mr ); b_max = bli_align_dim_to_mult( b_max, mr ); b_use = bli_determine_blocksize_b_sub( i, dim, b_alg, b_max ); return b_use; }
dim_t bli_gemm_determine_kc_b( dim_t i, dim_t dim, obj_t* a, obj_t* b, blksz_t* bsize ) { num_t dt; dim_t mnr; dim_t b_alg, b_max; dim_t b_use; // We assume that this function is being called from an algorithm that // is moving "backward" (ie: bottom to top, right to left, bottom-right // to top-left). // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_execution_datatype( *a ); b_alg = bli_blksz_get_def( dt, bsize ); b_max = bli_blksz_get_max( dt, bsize ); // Nudge the default and maximum kc blocksizes up to the nearest // multiple of MR if A is Hermitian or symmetric, or NR if B is // Hermitian or symmetric. If neither case applies, then we leave // the blocksizes unchanged. if ( bli_obj_root_is_herm_or_symm( *a ) ) { mnr = bli_blksz_get_mr( dt, bsize ); b_alg = bli_align_dim_to_mult( b_alg, mnr ); b_max = bli_align_dim_to_mult( b_max, mnr ); } else if ( bli_obj_root_is_herm_or_symm( *b ) ) { mnr = bli_blksz_get_nr( dt, bsize ); b_alg = bli_align_dim_to_mult( b_alg, mnr ); b_max = bli_align_dim_to_mult( b_max, mnr ); } b_use = bli_determine_blocksize_b_sub( i, dim, b_alg, b_max ); return b_use; }
dim_t bli_determine_blocksize_b( dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ) { num_t dt; blksz_t* bsize; dim_t b_alg, b_max; dim_t b_use; // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_execution_datatype( *obj ); bsize = bli_cntx_get_blksz( bszid, cntx ); b_alg = bli_blksz_get_def( dt, bsize ); b_max = bli_blksz_get_max( dt, bsize ); b_use = bli_determine_blocksize_b_sub( i, dim, b_alg, b_max ); return b_use; }