void bli_trmv( obj_t* alpha, obj_t* a, obj_t* x ) { trmv_t* trmv_cntl; num_t dt_targ_a; num_t dt_targ_x; bool_t a_is_contig; bool_t x_is_contig; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmv_check( alpha, a, x ); // Query the target datatypes of each object. dt_targ_a = bli_obj_target_datatype( *a ); dt_targ_x = bli_obj_target_datatype( *x ); // Determine whether each operand is stored contiguously. a_is_contig = ( bli_obj_is_row_stored( *a ) || bli_obj_is_col_stored( *a ) ); x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); bli_obj_init_scalar_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands are contiguous, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( a_is_contig && x_is_contig ) { // We use two control trees to handle the four cases corresponding to // combinations of transposition and row/column-storage. // The row-stored without transpose and column-stored with transpose // trees are identical. Same for the remaining two trees. if ( bli_obj_has_notrans( *a ) ) { if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol; else trmv_cntl = trmv_cntl_bs_ke_ncol_trow; } else // if ( bli_obj_has_trans( *a ) ) { if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow; else trmv_cntl = trmv_cntl_bs_ke_nrow_tcol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_has_notrans( *a ) ) { if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol; else trmv_cntl = trmv_cntl_ge_ncol_trow; } else // if ( bli_obj_has_trans( *a ) ) { if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow; else trmv_cntl = trmv_cntl_ge_nrow_tcol; } } // Invoke the internal back-end with the copy-cast of alpha and the // chosen control tree. bli_trmv_int( &alpha_local, a, x, trmv_cntl ); }
void bli_gemmsup_ref_var1n ( trans_t trans, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, stor3_t eff_id, cntx_t* cntx, rntm_t* rntm ) { #if 0 obj_t at, bt; bli_obj_alias_to( a, &at ); bli_obj_alias_to( b, &bt ); // Induce transpositions on A and/or B if either object is marked for // transposition. We can induce "fast" transpositions since they objects // are guaranteed to not have structure or be packed. if ( bli_obj_has_trans( &at ) ) { bli_obj_induce_fast_trans( &at ); } if ( bli_obj_has_trans( &bt ) ) { bli_obj_induce_fast_trans( &bt ); } const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width( &at ); void* restrict buf_a = bli_obj_buffer_at_off( &at ); const inc_t rs_a = bli_obj_row_stride( &at ); const inc_t cs_a = bli_obj_col_stride( &at ); void* restrict buf_b = bli_obj_buffer_at_off( &bt ); const inc_t rs_b = bli_obj_row_stride( &bt ); const inc_t cs_b = bli_obj_col_stride( &bt ); void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #else const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); dim_t k; void* restrict buf_a = bli_obj_buffer_at_off( a ); inc_t rs_a; inc_t cs_a; void* restrict buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b; inc_t cs_b; if ( bli_obj_has_notrans( a ) ) { k = bli_obj_width( a ); rs_a = bli_obj_row_stride( a ); cs_a = bli_obj_col_stride( a ); } else // if ( bli_obj_has_trans( a ) ) { // Assign the variables with an implicit transposition. k = bli_obj_length( a ); rs_a = bli_obj_col_stride( a ); cs_a = bli_obj_row_stride( a ); } if ( bli_obj_has_notrans( b ) ) { rs_b = bli_obj_row_stride( b ); cs_b = bli_obj_col_stride( b ); } else // if ( bli_obj_has_trans( b ) ) { // Assign the variables with an implicit transposition. rs_b = bli_obj_col_stride( b ); cs_b = bli_obj_row_stride( b ); } void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #endif // Index into the type combination array to extract the correct // function pointer. FUNCPTR_T f = ftypes_var1n[dt_exec]; if ( bli_is_notrans( trans ) ) { // Invoke the function. f ( conja, conjb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, buf_b, rs_b, cs_b, buf_beta, buf_c, rs_c, cs_c, eff_id, cntx, rntm ); } else { // Invoke the function (transposing the operation). f ( conjb, // swap the conj values. conja, n, // swap the m and n dimensions. m, k, buf_alpha, buf_b, cs_b, rs_b, // swap the positions of A and B. buf_a, cs_a, rs_a, // swap the strides of A and B. buf_beta, buf_c, cs_c, rs_c, // swap the strides of C. bli_stor3_trans( eff_id ), // transpose the stor3_t id. cntx, rntm ); } }