void bli_symv_front ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx ) { hemv_t* hemv_cntl; num_t dt_targ_a; num_t dt_targ_x; num_t dt_targ_y; bool_t a_has_unit_inc; bool_t x_has_unit_inc; bool_t y_has_unit_inc; obj_t alpha_local; obj_t beta_local; num_t dt_alpha; num_t dt_beta; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_symv_check( alpha, a, x, beta, y ); // Query the target datatypes of each object. dt_targ_a = bli_obj_target_dt( a ); dt_targ_x = bli_obj_target_dt( x ); dt_targ_y = bli_obj_target_dt( y ); // Determine whether each operand with unit stride. a_has_unit_inc = ( bli_obj_is_row_stored( a ) || bli_obj_is_col_stored( a ) ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Create an object to hold a copy-cast of beta. Notice that we use // the datatype of y. Here's why: If y is real and beta is complex, // there is no reason to keep beta_local in the complex domain since // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( a_has_unit_inc && x_has_unit_inc && y_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( a ) ) { if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; else hemv_cntl = hemv_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( a ) ) { if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow; else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( a ) ) { if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol; else hemv_cntl = hemv_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( a ) ) { if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow; else hemv_cntl = hemv_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-casts of scalars and the // chosen control tree. Set conjh to BLIS_NO_CONJUGATE to invoke the // symmetric (and not Hermitian) algorithms. bli_hemv_int( BLIS_NO_CONJUGATE, &alpha_local, a, x, &beta_local, y, cntx, hemv_cntl ); }
void bli_syr2_front ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx ) { her2_t* her2_cntl; num_t dt_targ_x; num_t dt_targ_y; //num_t dt_targ_c; bool_t x_has_unit_inc; bool_t y_has_unit_inc; bool_t c_has_unit_inc; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syr2_check( alpha, x, y, c ); // Query the target datatypes of each object. dt_targ_x = bli_obj_target_dt( x ); dt_targ_y = bli_obj_target_dt( y ); //dt_targ_c = bli_obj_target_dt( c ); // Determine whether each operand with unit stride. x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 ); c_has_unit_inc = ( bli_obj_is_row_stored( c ) || bli_obj_is_col_stored( c ) ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the datatypes of x and y. dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( x_has_unit_inc && y_has_unit_inc && c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol; else her2_cntl = her2_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow; else her2_cntl = her2_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol; else her2_cntl = her2_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow; else her2_cntl = her2_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-cast scalar and the // chosen control tree. Set conjh to BLIS_NO_CONJUGATE to invoke the // symmetric (and not Hermitian) algorithms. bli_her2_int( BLIS_NO_CONJUGATE, &alpha_local, &alpha_local, x, y, c, cntx, her2_cntl ); }
void bli_packv_init ( obj_t* a, obj_t* p, cntx_t* cntx, packv_t* cntl ) { // The purpose of packm_init() is to initialize an object P so that // a source object A can be packed into P via one of the packv // implementations. This initialization includes acquiring a suitable // block of memory from the memory allocator, if such a block of memory // has not already been allocated previously. pack_t pack_schema; bszid_t bmult_id; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_packv_check( a, p, cntx ); // First check if we are to skip this operation because the control tree // is NULL, and if so, simply alias the object to its packed counterpart. if ( bli_cntl_is_noop( cntl ) ) { bli_obj_alias_to( a, p ); return; } // At this point, we can be assured that cntl is not NULL. Let us now // check to see if the object has already been packed to the desired // schema (as encoded in the control tree). If so, we can alias and // return, as above. // Note that in most cases, bli_obj_pack_schema() will return // BLIS_NOT_PACKED and thus packing will be called for (but in some // cases packing has already taken place). Also, not all combinations // of current pack status and desired pack schema are valid. if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) ) { bli_obj_alias_to( a, p ); return; } // Now, if we are not skipping the pack operation, then the only question // left is whether we are to typecast vector a before packing. if ( bli_obj_dt( a ) != bli_obj_target_dt( a ) ) bli_abort(); // Extract various fields from the control tree and pass them in // explicitly into _init_pack(). This allows external code generators // the option of bypassing usage of control trees altogether. pack_schema = cntl_pack_schema( cntl ); bmult_id = cntl_bmid( cntl ); // Initialize object p for the final packed vector. bli_packv_init_pack ( pack_schema, bmult_id, &a, p, cntx ); // Now p is ready to be packed. }