err_t bli_check_upper_or_lower_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( !bli_obj_is_lower( *a ) && !bli_obj_is_upper( *a ) ) e_val = BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT; return e_val; }
void bli_obj_print( char* label, obj_t* obj ) { FILE* file = stdout; if ( bli_error_checking_is_enabled() ) bli_obj_print_check( label, obj ); fprintf( file, "\n" ); fprintf( file, "%s\n", label ); fprintf( file, "\n" ); fprintf( file, " m x n %lu x %lu\n", ( unsigned long int )bli_obj_length( *obj ), ( unsigned long int )bli_obj_width( *obj ) ); fprintf( file, "\n" ); fprintf( file, " offm, offn %lu, %lu\n", ( unsigned long int )bli_obj_row_off( *obj ), ( unsigned long int )bli_obj_col_off( *obj ) ); fprintf( file, " diagoff %ld\n", ( signed long int )bli_obj_diag_offset( *obj ) ); fprintf( file, "\n" ); fprintf( file, " buf %p\n", ( void* )bli_obj_buffer( *obj ) ); fprintf( file, " elem size %lu\n", ( unsigned long int )bli_obj_elem_size( *obj ) ); fprintf( file, " rs, cs %ld, %ld\n", ( signed long int )bli_obj_row_stride( *obj ), ( signed long int )bli_obj_col_stride( *obj ) ); fprintf( file, " is %ld\n", ( signed long int )bli_obj_imag_stride( *obj ) ); fprintf( file, " m_padded %lu\n", ( unsigned long int )bli_obj_padded_length( *obj ) ); fprintf( file, " n_padded %lu\n", ( unsigned long int )bli_obj_padded_width( *obj ) ); fprintf( file, " ps %lu\n", ( unsigned long int )bli_obj_panel_stride( *obj ) ); fprintf( file, "\n" ); fprintf( file, " info %lX\n", ( unsigned long int )(*obj).info ); fprintf( file, " - is complex %lu\n", ( unsigned long int )bli_obj_is_complex( *obj ) ); fprintf( file, " - is d. prec %lu\n", ( unsigned long int )bli_obj_is_double_precision( *obj ) ); fprintf( file, " - datatype %lu\n", ( unsigned long int )bli_obj_datatype( *obj ) ); fprintf( file, " - target dt %lu\n", ( unsigned long int )bli_obj_target_datatype( *obj ) ); fprintf( file, " - exec dt %lu\n", ( unsigned long int )bli_obj_execution_datatype( *obj ) ); fprintf( file, " - has trans %lu\n", ( unsigned long int )bli_obj_has_trans( *obj ) ); fprintf( file, " - has conj %lu\n", ( unsigned long int )bli_obj_has_conj( *obj ) ); fprintf( file, " - unit diag? %lu\n", ( unsigned long int )bli_obj_has_unit_diag( *obj ) ); fprintf( file, " - struc type %lu\n", ( unsigned long int )bli_obj_struc( *obj ) >> BLIS_STRUC_SHIFT ); fprintf( file, " - uplo type %lu\n", ( unsigned long int )bli_obj_uplo( *obj ) >> BLIS_UPLO_SHIFT ); fprintf( file, " - is upper %lu\n", ( unsigned long int )bli_obj_is_upper( *obj ) ); fprintf( file, " - is lower %lu\n", ( unsigned long int )bli_obj_is_lower( *obj ) ); fprintf( file, " - is dense %lu\n", ( unsigned long int )bli_obj_is_dense( *obj ) ); fprintf( file, " - pack schema %lu\n", ( unsigned long int )bli_obj_pack_schema( *obj ) >> BLIS_PACK_SCHEMA_SHIFT ); fprintf( file, " - packinv diag? %lu\n", ( unsigned long int )bli_obj_has_inverted_diag( *obj ) ); fprintf( file, " - pack ordifup %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_upper( *obj ) ); fprintf( file, " - pack ordiflo %lu\n", ( unsigned long int )bli_obj_is_pack_rev_if_lower( *obj ) ); fprintf( file, " - packbuf type %lu\n", ( unsigned long int )bli_obj_pack_buffer_type( *obj ) >> BLIS_PACK_BUFFER_SHIFT ); fprintf( file, "\n" ); }
void bli_trmm_blk_var2b( obj_t* a, obj_t* b, obj_t* c, gemm_t* cntl, trmm_thrinfo_t* thread ) { obj_t a_pack_s; obj_t b1_pack_s, c1_pack_s; obj_t b1, c1; obj_t* a_pack = NULL; obj_t* b1_pack = NULL; obj_t* c1_pack = NULL; dim_t i; dim_t b_alg; dim_t n_trans; if( thread_am_ochief( thread ) ) { // Initialize object for packing A bli_obj_init_pack( &a_pack_s ); bli_packm_init( a, &a_pack_s, cntl_sub_packm_a( cntl ) ); // Scale C by beta (if instructed). bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); } a_pack = thread_obroadcast( thread, &a_pack_s ); // Initialize pack objects for B and C that are passed into packm_init(). if( thread_am_ichief( thread ) ) { bli_obj_init_pack( &b1_pack_s ); bli_obj_init_pack( &c1_pack_s ); } b1_pack = thread_ibroadcast( thread, &b1_pack_s ); c1_pack = thread_ibroadcast( thread, &c1_pack_s ); // Pack A (if instructed). bli_packm_int( a, a_pack, cntl_sub_packm_a( cntl ), trmm_thread_sub_opackm( thread ) ); // Query dimension in partitioning direction. n_trans = bli_obj_width_after_trans( *b ); dim_t start, end; bli_get_range_weighted( thread, 0, n_trans, bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ), bli_obj_is_upper( *c ), &start, &end ); // Partition along the n dimension. for ( i = start; i < end; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_b( i, end, b, cntl_blocksize( cntl ) ); // Acquire partitions for B1 and C1. bli_acquire_mpart_r2l( BLIS_SUBPART1, i, b_alg, b, &b1 ); bli_acquire_mpart_r2l( BLIS_SUBPART1, i, b_alg, c, &c1 ); // Initialize objects for packing A1 and B1. if( thread_am_ichief( thread ) ) { bli_packm_init( &b1, b1_pack, cntl_sub_packm_b( cntl ) ); bli_packm_init( &c1, c1_pack, cntl_sub_packm_c( cntl ) ); } thread_ibarrier( thread ); // Pack B1 (if instructed). bli_packm_int( &b1, b1_pack, cntl_sub_packm_b( cntl ), trmm_thread_sub_ipackm( thread ) ); // Pack C1 (if instructed). bli_packm_int( &c1, c1_pack, cntl_sub_packm_c( cntl ), trmm_thread_sub_ipackm( thread ) ); // Perform trmm subproblem. bli_trmm_int( &BLIS_ONE, a_pack, b1_pack, &BLIS_ONE, c1_pack, cntl_sub_gemm( cntl ), trmm_thread_sub_trmm( thread ) ); // Unpack C1 (if C1 was packed). bli_unpackm_int( c1_pack, &c1, cntl_sub_unpackm_c( cntl ), trmm_thread_sub_ipackm( thread ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. thread_obarrier( thread ); if( thread_am_ochief( thread ) ) bli_obj_release_pack( a_pack ); if( thread_am_ichief( thread ) ) { bli_obj_release_pack( b1_pack ); bli_obj_release_pack( c1_pack ); } }
void bli_trsm_blk_var1b( obj_t* a, obj_t* b, obj_t* c, trsm_t* cntl ) { obj_t a1, a1_pack; obj_t b_pack; obj_t c1; dim_t i; dim_t b_alg; dim_t m_trans; dim_t offA; // Initialize all pack objects that are passed into packm_init(). bli_obj_init_pack( &a1_pack ); bli_obj_init_pack( &b_pack ); // Set the default length of and offset to the non-zero part of A. m_trans = bli_obj_length_after_trans( *a ); offA = 0; // If A is upper triangular, we have to adjust where the non-zero part of // A begins. if ( bli_obj_is_upper( *a ) ) offA = m_trans - bli_abs( bli_obj_diag_offset_after_trans( *a ) ) - bli_obj_width_after_trans( *a ); // Initialize object for packing B. bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Pack B1 (if instructed). bli_packm_int( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the remaining portion of the m dimension. for ( i = offA; i < m_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_b( i, m_trans, a, cntl_blocksize( cntl ) ); // Acquire partitions for A1 and C1. bli_acquire_mpart_b2t( BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_mpart_b2t( BLIS_SUBPART1, i, b_alg, c, &c1 ); //if ( bli_obj_is_zeros( a1 ) ) continue; // Initialize object for packing A1. bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); // Pack A1 (if instructed). bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); // Perform trsm subproblem. bli_trsm_int( &BLIS_ONE, &a1_pack, &b_pack, &BLIS_ONE, &c1, cntl_sub_trsm( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_obj_release_pack( &a1_pack ); bli_obj_release_pack( &b_pack ); }
void bli_trmm_blk_var1( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, trmm_t* cntl ) { obj_t a1, a1_pack; obj_t b_pack; obj_t c1, c1_pack; dim_t i; dim_t b_alg; dim_t m_trans; dim_t offA; // Initialize all pack objects that are passed into packm_init(). bli_obj_init_pack( &a1_pack ); bli_obj_init_pack( &b_pack ); bli_obj_init_pack( &c1_pack ); // Set the default length of and offset to the non-zero part of A. m_trans = bli_obj_length_after_trans( *a ); offA = 0; // If A is lower triangular, we have to adjust where the non-zero part of // A begins. If A is upper triangular, we have to adjust the length of // the non-zero part. If A is general/dense, then we keep the defaults. if ( bli_obj_is_lower( *a ) ) offA = bli_abs( bli_obj_diag_offset_after_trans( *a ) ); else if ( bli_obj_is_upper( *a ) ) m_trans = bli_abs( bli_obj_diag_offset_after_trans( *a ) ) + bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). bli_scalm_int( beta, c, cntl_sub_scalm( cntl ) ); // Initialize object for packing B. bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Pack B and scale by alpha (if instructed). bli_packm_int( alpha, b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. for ( i = offA; i < m_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( i, m_trans, a, cntl_blocksize( cntl ) ); // Acquire partitions for A1 and C1. bli_acquire_mpart_t2b( BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_mpart_t2b( BLIS_SUBPART1, i, b_alg, c, &c1 ); // Initialize objects for packing A1 and C1. bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Pack A1 and scale by alpha (if instructed). bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); // Pack C1 and scale by beta (if instructed). bli_packm_int( beta, &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trmm subproblem. bli_trmm_int( alpha, &a1_pack, &b_pack, beta, &c1_pack, cntl_sub_trmm( cntl ) ); // Unpack C1 (if C1 was packed). bli_unpackm_int( &c1_pack, &c1, cntl_sub_unpackm_c( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_obj_release_pack( &a1_pack ); bli_obj_release_pack( &b_pack ); bli_obj_release_pack( &c1_pack ); }