err_t bli_check_triangular_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( !bli_obj_is_triangular( *a ) ) e_val = BLIS_EXPECTED_TRIANGULAR_OBJECT; return e_val; }
void bli_l3_cntl_create_if ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, cntl_t* cntl_orig, cntl_t** cntl_use ) { // If the control tree pointer is NULL, we construct a default // tree as a function of the operation family. if ( cntl_orig == NULL ) { opid_t family = bli_cntx_get_family( cntx ); if ( family == BLIS_GEMM || family == BLIS_HERK || family == BLIS_TRMM ) { *cntl_use = bli_gemm_cntl_create( family ); } else // if ( family == BLIS_TRSM ) { side_t side; if ( bli_obj_is_triangular( *a ) ) side = BLIS_LEFT; else side = BLIS_RIGHT; *cntl_use = bli_trsm_cntl_create( side ); } } else { // If the user provided a control tree, create a copy and use it // instead (so that it can be used to cache things like pack mem_t // entries). *cntl_use = bli_cntl_copy( cntl_orig ); } }
void bli_trmm_lu_blk_var1( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, trmm_t* cntl ) { obj_t a1, a1_pack; obj_t b_pack; obj_t c1, c1_pack; dim_t i; dim_t b_alg; dim_t mT_trans; // Initialize all pack objects that are passed into packm_init(). bli_obj_init_pack( &a1_pack ); bli_obj_init_pack( &b_pack ); bli_obj_init_pack( &c1_pack ); // If A is [upper] triangular, use the diagonal offset of A to determine // the length of the non-zero region. if ( bli_obj_is_triangular( *a ) ) mT_trans = bli_abs( bli_obj_diag_offset_after_trans( *a ) ) + bli_obj_width_after_trans( *a ); else // if ( bli_obj_is_general( *a ) mT_trans = bli_obj_length_after_trans( *a ); // Scale C by beta (if instructed). bli_scalm_int( beta, c, cntl_sub_scalm( cntl ) ); // Initialize object for packing B. bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Pack B and scale by alpha (if instructed). bli_packm_int( alpha, b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. for ( i = 0; i < mT_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( i, mT_trans, a, cntl_blocksize( cntl ) ); // Acquire partitions for A1 and C1. bli_acquire_mpart_t2b( BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_mpart_t2b( BLIS_SUBPART1, i, b_alg, c, &c1 ); // Initialize objects for packing A1 and C1. bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Pack A1 and scale by alpha (if instructed). bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); // Pack C1 and scale by beta (if instructed). bli_packm_int( beta, &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trmm subproblem. bli_trmm_int( BLIS_LEFT, alpha, &a1_pack, &b_pack, beta, &c1_pack, cntl_sub_trmm( cntl ) ); // Unpack C1 (if C1 was packed). bli_unpackm_int( &c1_pack, &c1, cntl_sub_unpackm_c( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_obj_release_pack( &a1_pack ); bli_obj_release_pack( &b_pack ); bli_obj_release_pack( &c1_pack ); }