void bli_symv( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { hemv_t* hemv_cntl; num_t dt_targ_a; num_t dt_targ_x; num_t dt_targ_y; bool_t a_is_contig; bool_t x_is_contig; bool_t y_is_contig; obj_t alpha_local; obj_t beta_local; num_t dt_alpha; num_t dt_beta; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_symv_check( alpha, a, x, beta, y ); // Query the target datatypes of each object. dt_targ_a = bli_obj_target_datatype( *a ); dt_targ_x = bli_obj_target_datatype( *x ); dt_targ_y = bli_obj_target_datatype( *y ); // Determine whether each operand is stored contiguously. a_is_contig = ( bli_obj_is_row_stored( *a ) || bli_obj_is_col_stored( *a ) ); x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); y_is_contig = ( bli_obj_vector_inc( *y ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); bli_obj_init_scalar_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Create an object to hold a copy-cast of beta. Notice that we use // the datatype of y. Here's why: If y is real and beta is complex, // there is no reason to keep beta_local in the complex domain since // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; bli_obj_init_scalar_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); // If all operands are contiguous, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( a_is_contig && x_is_contig && y_is_contig ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( *a ) ) { if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; else hemv_cntl = hemv_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( *a ) ) { if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow; else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( y_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( *a ) ) { if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol; else hemv_cntl = hemv_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( *a ) ) { if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow; else hemv_cntl = hemv_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-casts of scalars and the // chosen control tree. Set conjh to BLIS_NO_CONJUGATE to invoke the // symmetric (and not Hermitian) algorithms. bli_hemv_int( BLIS_NO_CONJUGATE, &alpha_local, a, x, &beta_local, y, hemv_cntl ); }
void bli_hemv_int( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t a_local; // Check parameters. if ( bli_error_checking_is_enabled() ) { if ( bli_is_conj( conjh ) ) bli_hemv_check( alpha, a, x, beta, y ); else bli_symv_check( alpha, a, x, beta, y ); } // If y has a zero dimension, return early. if ( bli_obj_has_zero_dim( *y ) ) return; // If x has a zero dimension, scale y by beta and return early. if ( bli_obj_has_zero_dim( *x ) ) { bli_scalm( beta, y ); return; } // Alias A in case we need to induce the upper triangular case. bli_obj_alias_to( *a, a_local ); /* // Our blocked algorithms only [explicitly] implement the lower triangular // case, so if matrix A is stored as upper triangular, we must toggle the // transposition (and conjugation) bits so that the diagonal partitioning // routines grab the correct partitions corresponding to the upper // triangular case. But we only need to do this for blocked algorithms, // since unblocked algorithms are responsible for handling the upper case // explicitly (and they should not be inspecting the transposition bit anyway). if ( bli_cntl_is_blocked( cntl ) && bli_obj_is_upper( *a ) ) { bli_obj_toggle_conj( a_local ); bli_obj_toggle_trans( a_local ); } */ // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( conjh, alpha, &a_local, x, beta, y, cntx, cntl ); }