void bli_ger_unb_var2( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, cntx_t* cntx, ger_t* cntl ) { num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); num_t dt_a = bli_obj_datatype( *a ); conj_t conjx = bli_obj_conj_status( *x ); conj_t conjy = bli_obj_conj_status( *y ); dim_t m = bli_obj_length( *a ); dim_t n = bli_obj_width( *a ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t incx = bli_obj_vector_inc( *x ); void* buf_y = bli_obj_buffer_at_off( *y ); inc_t incy = bli_obj_vector_inc( *y ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; // The datatype of alpha MUST be the type union of x and y. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_x, dt_y ); buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a]; // Invoke the function. f( conjx, conjy, m, n, buf_alpha, buf_x, incx, buf_y, incy, buf_a, rs_a, cs_a, cntx ); }
void bli_dotaxpyv_kernel( obj_t* alpha, obj_t* xt, obj_t* x, obj_t* y, obj_t* rho, obj_t* z ) { num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); num_t dt_z = bli_obj_datatype( *z ); conj_t conjxt = bli_obj_conj_status( *xt ); conj_t conjx = bli_obj_conj_status( *x ); conj_t conjy = bli_obj_conj_status( *y ); dim_t n = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t inc_y = bli_obj_vector_inc( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); inc_t inc_z = bli_obj_vector_inc( *z ); void* buf_z = bli_obj_buffer_at_off( *z ); void* buf_rho = bli_obj_buffer_at_off( *rho ); num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; // If alpha is a scalar constant, use dt_x to extract the address of the // corresponding constant value; otherwise, use the datatype encoded // within the alpha object and extract the buffer at the alpha offset. bli_set_scalar_dt_buffer( alpha, dt_x, dt_alpha, buf_alpha ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x][dt_y][dt_z]; // Invoke the function. f( conjxt, conjx, conjy, n, buf_alpha, buf_x, inc_x, buf_y, inc_y, buf_rho, buf_z, inc_z ); }
void bli_her2_unb_var2( conj_t conjh, obj_t* alpha, obj_t* alpha_conj, obj_t* x, obj_t* y, obj_t* c, her2_t* cntl ) { num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); num_t dt_c = bli_obj_datatype( *c ); uplo_t uplo = bli_obj_uplo( *c ); conj_t conjx = bli_obj_conj_status( *x ); conj_t conjy = bli_obj_conj_status( *y ); dim_t m = bli_obj_length( *c ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t incx = bli_obj_vector_inc( *x ); void* buf_y = bli_obj_buffer_at_off( *y ); inc_t incy = bli_obj_vector_inc( *y ); void* buf_c = bli_obj_buffer_at_off( *c ); inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x][dt_y][dt_c]; // Invoke the function. f( uplo, conjx, conjy, conjh, m, buf_alpha, buf_x, incx, buf_y, incy, buf_c, rs_c, cs_c ); }
void bli_axpyf_kernel( obj_t* alpha, obj_t* a, obj_t* x, obj_t* y ) { num_t dt_a = bli_obj_datatype( *a ); num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); conj_t conja = bli_obj_conj_status( *a ); conj_t conjx = bli_obj_conj_status( *x ); dim_t m = bli_obj_vector_dim( *y ); dim_t b_n = bli_obj_vector_dim( *x ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t inc_y = bli_obj_vector_inc( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a][dt_x][dt_y]; // Invoke the function. f( conja, conjx, m, b_n, buf_alpha, buf_a, rs_a, cs_a, buf_x, inc_x, buf_y, inc_y ); }
// // Define object-based interface. // void bli_scalv( obj_t* alpha, obj_t* x ) { num_t dt = bli_obj_datatype( *x ); dim_t n = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); obj_t alpha_local; void* buf_alpha; FUNCPTR_T f = ftypes[dt]; if ( bli_error_checking_is_enabled() ) bli_scalv_check( alpha, x ); // Create a local copy-cast of alpha (and apply internal conjugation // if needed). bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Extract the scalar buffer. buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); // Invoke the void pointer-based function. f( BLIS_NO_CONJUGATE, // conjugation applied during copy-cast. n, buf_alpha, buf_x, inc_x ); }
void bli_sumsqv_unb_var1( obj_t* x, obj_t* scale, obj_t* sumsq ) { num_t dt_x = bli_obj_datatype( *x ); //num_t dt_s = bli_obj_datatype( *scale ); dim_t n = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); void* buf_scale = bli_obj_buffer_at_off( *scale ); void* buf_sumsq = bli_obj_buffer_at_off( *sumsq ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x]; //[dt_s]; // Invoke the function. f( n, buf_x, inc_x, buf_scale, buf_sumsq ); }
void bli_axpy2v_ker( obj_t* alpha1, obj_t* alpha2, obj_t* x, obj_t* y, obj_t* z ) { num_t dt = bli_obj_datatype( *z ); conj_t conjx = bli_obj_conj_status( *x ); conj_t conjy = bli_obj_conj_status( *y ); dim_t n = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t inc_y = bli_obj_vector_inc( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); inc_t inc_z = bli_obj_vector_inc( *z ); void* buf_z = bli_obj_buffer_at_off( *z ); void* buf_alpha1 = bli_obj_buffer_for_1x1( dt, *alpha1 ); void* buf_alpha2 = bli_obj_buffer_for_1x1( dt, *alpha2 ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt]; // Invoke the function. f( conjx, conjy, n, buf_alpha1, buf_alpha2, buf_x, inc_x, buf_y, inc_y, buf_z, inc_z ); }
void bli_scal2v_unb_var1( obj_t* beta, obj_t* x, obj_t* y ) { num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); conj_t conjx = bli_obj_conj_status( *x ); dim_t n = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t inc_y = bli_obj_vector_inc( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); num_t dt_beta; void* buf_beta; FUNCPTR_T f; // If beta is a scalar constant, use dt_x to extract the address of the // corresponding constant value; otherwise, use the datatype encoded // within the beta object and extract the buffer at the beta offset. bli_set_scalar_dt_buffer( beta, dt_x, dt_beta, buf_beta ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_beta][dt_x][dt_y]; // Invoke the function. f( conjx, n, buf_beta, buf_x, inc_x, buf_y, inc_y ); }
void bli_trmv_unf_var2( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ) { num_t dt_a = bli_obj_datatype( *a ); num_t dt_x = bli_obj_datatype( *x ); uplo_t uplo = bli_obj_uplo( *a ); trans_t trans = bli_obj_conjtrans_status( *a ); diag_t diag = bli_obj_diag( *a ); dim_t m = bli_obj_length( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t incx = bli_obj_vector_inc( *x ); num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a][dt_x]; // Invoke the function. f( uplo, trans, diag, m, buf_alpha, buf_a, rs_a, cs_a, buf_x, incx ); }
void bli_invertv_kernel( obj_t* x ) { num_t dt_x = bli_obj_datatype( *x ); dim_t n = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x]; // Invoke the function. f( n, buf_x, inc_x ); }
void bli_normfv_unb_var1( obj_t* x, obj_t* norm ) { num_t dt_x = bli_obj_datatype( *x ); dim_t m = bli_obj_vector_dim( *x ); inc_t incx = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); void* buf_norm = bli_obj_buffer_at_off( *norm ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x]; // Invoke the function. f( m, buf_x, incx, buf_norm ); }
void bli_fprintv( FILE* file, char* s1, obj_t* x, char* format, char* s2 ) { num_t dt_x = bli_obj_datatype( *x ); dim_t n = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x]; // Invoke the function. f( file, s1, n, buf_x, inc_x, format, s2 ); }
void bli_hemv_unb_var1( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ) { num_t dt_a = bli_obj_datatype( *a ); num_t dt_x = bli_obj_datatype( *x ); num_t dt_y = bli_obj_datatype( *y ); uplo_t uplo = bli_obj_uplo( *a ); conj_t conja = bli_obj_conj_status( *a ); conj_t conjx = bli_obj_conj_status( *x ); dim_t m = bli_obj_length( *a ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t incx = bli_obj_vector_inc( *x ); void* buf_y = bli_obj_buffer_at_off( *y ); inc_t incy = bli_obj_vector_inc( *y ); num_t dt_alpha; void* buf_alpha; num_t dt_beta; void* buf_beta; FUNCPTR_T f; // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a]; // Invoke the function. f( uplo, conja, conjx, conjh, m, buf_alpha, buf_a, rs_a, cs_a, buf_x, incx, buf_beta, buf_y, incy ); }
void bli_syr2_front ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx ) { her2_t* her2_cntl; num_t dt_targ_x; num_t dt_targ_y; //num_t dt_targ_c; bool_t x_has_unit_inc; bool_t y_has_unit_inc; bool_t c_has_unit_inc; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syr2_check( alpha, x, y, c ); // Query the target datatypes of each object. dt_targ_x = bli_obj_target_dt( x ); dt_targ_y = bli_obj_target_dt( y ); //dt_targ_c = bli_obj_target_dt( c ); // Determine whether each operand with unit stride. x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 ); c_has_unit_inc = ( bli_obj_is_row_stored( c ) || bli_obj_is_col_stored( c ) ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the datatypes of x and y. dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( x_has_unit_inc && y_has_unit_inc && c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol; else her2_cntl = her2_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow; else her2_cntl = her2_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol; else her2_cntl = her2_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow; else her2_cntl = her2_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-cast scalar and the // chosen control tree. Set conjh to BLIS_NO_CONJUGATE to invoke the // symmetric (and not Hermitian) algorithms. bli_her2_int( BLIS_NO_CONJUGATE, &alpha_local, &alpha_local, x, y, c, cntx, her2_cntl ); }
void bli_trmv( obj_t* alpha, obj_t* a, obj_t* x ) { trmv_t* trmv_cntl; num_t dt_targ_a; num_t dt_targ_x; bool_t a_is_contig; bool_t x_is_contig; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmv_check( alpha, a, x ); // Query the target datatypes of each object. dt_targ_a = bli_obj_target_datatype( *a ); dt_targ_x = bli_obj_target_datatype( *x ); // Determine whether each operand is stored contiguously. a_is_contig = ( bli_obj_is_row_stored( *a ) || bli_obj_is_col_stored( *a ) ); x_is_contig = ( bli_obj_vector_inc( *x ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); bli_obj_init_scalar_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands are contiguous, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( a_is_contig && x_is_contig ) { // We use two control trees to handle the four cases corresponding to // combinations of transposition and row/column-storage. // The row-stored without transpose and column-stored with transpose // trees are identical. Same for the remaining two trees. if ( bli_obj_has_notrans( *a ) ) { if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol; else trmv_cntl = trmv_cntl_bs_ke_ncol_trow; } else // if ( bli_obj_has_trans( *a ) ) { if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow; else trmv_cntl = trmv_cntl_bs_ke_nrow_tcol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( a_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); if ( x_is_contig ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_has_notrans( *a ) ) { if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol; else trmv_cntl = trmv_cntl_ge_ncol_trow; } else // if ( bli_obj_has_trans( *a ) ) { if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow; else trmv_cntl = trmv_cntl_ge_nrow_tcol; } } // Invoke the internal back-end with the copy-cast of alpha and the // chosen control tree. bli_trmv_int( &alpha_local, a, x, trmv_cntl ); }
void bli_hemv( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { hemv_t* hemv_cntl; num_t dt_targ_a; num_t dt_targ_x; num_t dt_targ_y; bool_t a_has_unit_inc; bool_t x_has_unit_inc; bool_t y_has_unit_inc; obj_t alpha_local; obj_t beta_local; num_t dt_alpha; num_t dt_beta; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_hemv_check( alpha, a, x, beta, y ); // Query the target datatypes of each object. dt_targ_a = bli_obj_target_datatype( *a ); dt_targ_x = bli_obj_target_datatype( *x ); dt_targ_y = bli_obj_target_datatype( *y ); // Determine whether each operand with unit stride. a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || bli_obj_is_col_stored( *a ) ); x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Create an object to hold a copy-cast of beta. Notice that we use // the datatype of y. Here's why: If y is real and beta is complex, // there is no reason to keep beta_local in the complex domain since // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( a_has_unit_inc && x_has_unit_inc && y_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( *a ) ) { if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; else hemv_cntl = hemv_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( *a ) ) { if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow; else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( *a ) ) { if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol; else hemv_cntl = hemv_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( *a ) ) { if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow; else hemv_cntl = hemv_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-casts of scalars and the // chosen control tree. Set conjh to BLIS_CONJUGATE to invoke the // Hermitian (and not symmetric) algorithms. bli_hemv_int( BLIS_CONJUGATE, &alpha_local, a, x, &beta_local, y, hemv_cntl ); }
int main( int argc, char** argv ) { obj_t a, x, y; obj_t a_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_x, dt_y; num_t dt_alpha; int r, n_repeats; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 15; n_input = 15; #endif dt_alpha = dt_x = dt_y = dt_a = BLIS_DOUBLE; for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_y, n, 1, 0, 0, &y ); bli_obj_create( dt_a, m, n, 0, 0, &a ); bli_obj_create( dt_a, m, n, 0, 0, &a_save ); bli_randm( &x ); bli_randm( &y ); bli_randm( &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &a, &a_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &a_save, &a ); dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "y", &y, "%4.1f", "" ); bli_printm( "a", &a, "%4.1f", "" ); #endif #ifdef BLIS bli_ger( &alpha, &x, &y, &a ); #else f77_int mm = bli_obj_length( a ); f77_int nn = bli_obj_width( a ); f77_int incx = bli_obj_vector_inc( x ); f77_int incy = bli_obj_vector_inc( y ); f77_int lda = bli_obj_col_stride( a ); double* alphap = bli_obj_buffer( alpha ); double* xp = bli_obj_buffer( x ); double* yp = bli_obj_buffer( y ); double* ap = bli_obj_buffer( a ); dger_( &mm, &nn, alphap, xp, &incx, yp, &incy, ap, &lda ); #endif #ifdef PRINT bli_printm( "a after", &a, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_ger_blis" ); #else printf( "data_ger_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &a_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, x; obj_t x_save; obj_t alpha; dim_t m; dim_t p; dim_t p_begin, p_end, p_inc; int m_input; num_t dt_a, dt_x; num_t dt_alpha; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 15; n_input = 15; #endif dt_alpha = dt_a = dt_x = BLIS_DOUBLE; uplo = BLIS_LOWER; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_trsv_blis" ); #else printf( "data_trv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_x, m, 1, 0, 0, &x_save ); bli_randm( &a ); bli_randm( &x ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); // Randomize A and zero the unstored triangle to ensure the // implementation reads only from the stored region. bli_randm( &a ); bli_mktrim( &a ); // Load the diagonal of A to make it more likely to be invertible. bli_shiftd( &BLIS_TWO, &a ); bli_setsc( (1.0/1.0), 0.0, &alpha ); bli_copym( &x, &x_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &x_save, &x ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "x", &x, "%4.1f", "" ); #endif #ifdef BLIS bli_trsv( &BLIS_ONE, &a, &x ); #else f77_char uploa = 'L'; f77_char transa = 'N'; f77_char diaga = 'N'; f77_int mm = bli_obj_length( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int incx = bli_obj_vector_inc( &x ); double* ap = bli_obj_buffer( &a ); double* xp = bli_obj_buffer( &x ); dtrsv_( &uploa, &transa, &diaga, &mm, ap, &lda, xp, &incx ); #endif #ifdef PRINT bli_printm( "x after", &x, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_trsv_blis" ); #else printf( "data_trsv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &x_save ); } //bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, x; obj_t a_save; obj_t alpha; dim_t m; dim_t p; dim_t p_begin, p_end, p_inc; int m_input; num_t dt_a, dt_x; num_t dt_alpha; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 6; #endif #if 1 dt_alpha = dt_x = dt_a = BLIS_DOUBLE; #else dt_alpha = dt_x = dt_a = BLIS_DCOMPLEX; #endif uplo = BLIS_LOWER; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_her_blis" ); #else printf( "data_her_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_a, m, m, 0, 0, &a_save ); bli_randm( &x ); bli_randm( &a ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); //bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uplo, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &a, &a_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &a_save, &a ); dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "a", &a, "%4.1f", "" ); #endif #ifdef BLIS //bli_obj_toggle_conj( &x ); //bli_syr( &alpha, bli_her( &alpha, &x, &a ); #else f77_char uplo = 'L'; f77_int mm = bli_obj_length( &a ); f77_int incx = bli_obj_vector_inc( &x ); f77_int lda = bli_obj_col_stride( &a ); double* alphap = bli_obj_buffer( &alpha ); double* xp = bli_obj_buffer( &x ); double* ap = bli_obj_buffer( &a ); /* dcomplex* xp = bli_obj_buffer( x ); dcomplex* ap = bli_obj_buffer( &a ); */ dsyr_( &uplo, //zher_( &uplo, &mm, alphap, xp, &incx, ap, &lda ); #endif #ifdef PRINT bli_printm( "a after", &a, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_her_blis" ); #else printf( "data_her_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &a ); bli_obj_free( &a_save ); } //bli_finalize(); return 0; }
// // Define object-based interface. // void bli_dotxf( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { num_t dt = bli_obj_datatype( *x ); conj_t conja = bli_obj_conj_status( *a ); conj_t conjx = bli_obj_conj_status( *x ); dim_t m = bli_obj_vector_dim( *y ); dim_t b_n = bli_obj_vector_dim( *x ); void* buf_a = bli_obj_buffer_at_off( *a ); inc_t rs_a = bli_obj_row_stride( *a ); inc_t cs_a = bli_obj_col_stride( *a ); void* buf_x = bli_obj_buffer_at_off( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_y = bli_obj_buffer_at_off( *y ); inc_t inc_y = bli_obj_vector_inc( *y ); obj_t alpha_local; void* buf_alpha; obj_t beta_local; void* buf_beta; FUNCPTR_T f = ftypes[dt]; if ( bli_error_checking_is_enabled() ) bli_dotxf_check( alpha, a, x, beta, y ); // Create local copy-casts of the scalars (and apply internal conjugation // if needed). bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, alpha, &alpha_local ); bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, beta, &beta_local ); // Extract the scalar buffers. buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); // Support cases where matrix A requires a transposition. if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } // Invoke the void pointer-based function. f( conja, conjx, m, b_n, buf_alpha, buf_a, rs_a, cs_a, buf_x, inc_x, buf_beta, buf_y, inc_y ); }
void libblis_test_setv_check( obj_t* beta, obj_t* x, double* resid ) { num_t dt_x = bli_obj_datatype( *x ); dim_t m_x = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); void* buf_beta = bli_obj_buffer_for_1x1( dt_x, *beta ); dim_t i; *resid = 0.0; // // The easiest way to check that setv was successful is to confirm // that each element of x is equal to beta. // if ( bli_obj_is_float( *x ) ) { float* chi1 = buf_x; float* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_seq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } else if ( bli_obj_is_double( *x ) ) { double* chi1 = buf_x; double* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_deq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } else if ( bli_obj_is_scomplex( *x ) ) { scomplex* chi1 = buf_x; scomplex* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_ceq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } else // if ( bli_obj_is_dcomplex( *x ) ) { dcomplex* chi1 = buf_x; dcomplex* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_zeq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } }
void libblis_test_randv_check( obj_t* x, double* resid ) { dim_t m_x = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); *resid = 0.0; // // The two most likely ways that randv would fail is if all elements // were zero, or if all elements were greater than or equal to one. // We check both of these conditions by computing the sum of the // absolute values of the elements of x. // if ( bli_obj_is_float( *x ) ) { float sum_x; bli_sabsumv( m_x, buf_x, inc_x, &sum_x ); if ( sum_x == *bli_s0 ) *resid = 1.0; else if ( sum_x >= 1.0 * m_x ) *resid = 2.0; } else if ( bli_obj_is_double( *x ) ) { double sum_x; bli_dabsumv( m_x, buf_x, inc_x, &sum_x ); if ( sum_x == *bli_d0 ) *resid = 1.0; else if ( sum_x >= 1.0 * m_x ) *resid = 2.0; } else if ( bli_obj_is_scomplex( *x ) ) { float sum_x; bli_cabsumv( m_x, buf_x, inc_x, &sum_x ); if ( sum_x == *bli_s0 ) *resid = 1.0; else if ( sum_x >= 2.0 * m_x ) *resid = 2.0; } else // if ( bli_obj_is_dcomplex( *x ) ) { double sum_x; bli_zabsumv( m_x, buf_x, inc_x, &sum_x ); if ( sum_x == *bli_d0 ) *resid = 1.0; else if ( sum_x >= 2.0 * m_x ) *resid = 2.0; } }