dim_t bli_trsm_determine_kc_f( dim_t i, dim_t dim, obj_t* obj, blksz_t* bsize ) { num_t dt; dim_t mr; dim_t b_alg, b_max; dim_t b_use; // We assume that this function is being called from an algorithm that // is moving "forward" (ie: top to bottom, left to right, top-left // to bottom-right). // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_execution_datatype( *obj ); b_alg = bli_blksz_for_type( dt, bsize ); b_max = bli_blksz_max_for_type( dt, bsize ); // Nudge the default and maximum kc blocksizes up to the nearest // multiple of MR. We always use MR (rather than sometimes use NR // because even when the triangle is on the right, packing of that // matrix uses MR, since only left-side trsm micro-kernels are // supported. mr = bli_info_get_default_mr( dt ); b_alg = bli_align_dim_to_mult( b_alg, mr ); b_max = bli_align_dim_to_mult( b_max, mr ); b_use = bli_determine_blocksize_f_sub( i, dim, b_alg, b_max ); return b_use; }
dim_t bli_determine_blocksize_f( dim_t i, dim_t dim, obj_t* obj, blksz_t* bsize ) { num_t dt; dim_t b_alg, b_max; dim_t b_use; // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_execution_datatype( *obj ); b_alg = bli_blksz_for_type( dt, bsize ); b_max = bli_blksz_max_for_type( dt, bsize ); b_use = bli_determine_blocksize_f_sub( i, dim, b_alg, b_max ); return b_use; }
dim_t bli_gemm_determine_kc_f( dim_t i, dim_t dim, obj_t* a, obj_t* b, blksz_t* bsize ) { num_t dt; dim_t mnr; dim_t b_alg, b_max; dim_t b_use; // We assume that this function is being called from an algorithm that // is moving "forward" (ie: top to bottom, left to right, top-left // to bottom-right). // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_execution_datatype( *a ); b_alg = bli_blksz_get_def( dt, bsize ); b_max = bli_blksz_get_max( dt, bsize ); // Nudge the default and maximum kc blocksizes up to the nearest // multiple of MR if A is Hermitian or symmetric, or NR if B is // Hermitian or symmetric. If neither case applies, then we leave // the blocksizes unchanged. if ( bli_obj_root_is_herm_or_symm( *a ) ) { mnr = bli_blksz_get_mr( dt, bsize ); b_alg = bli_align_dim_to_mult( b_alg, mnr ); b_max = bli_align_dim_to_mult( b_max, mnr ); } else if ( bli_obj_root_is_herm_or_symm( *b ) ) { mnr = bli_blksz_get_nr( dt, bsize ); b_alg = bli_align_dim_to_mult( b_alg, mnr ); b_max = bli_align_dim_to_mult( b_max, mnr ); } b_use = bli_determine_blocksize_f_sub( i, dim, b_alg, b_max ); return b_use; }