void libblis_test_addv_check( obj_t* alpha, obj_t* beta, obj_t* x, obj_t* y, double* resid ) { num_t dt = bli_obj_datatype( *x ); num_t dt_real = bli_obj_datatype_proj_to_real( *x ); dim_t m = bli_obj_vector_dim( *x ); conj_t conjx = bli_obj_conj_status( *x ); obj_t aplusb; obj_t alpha_conj; obj_t norm_r, m_r, temp_r; double junk; // // Pre-conditions: // - x is set to alpha. // - y_orig is set to beta. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig + conjx(x) // // is functioning correctly if // // fnormv(y) - sqrt( absqsc( beta + conjx(alpha) ) * m ) // // is negligible. // bli_obj_scalar_init_detached( dt, &aplusb ); bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_obj_scalar_init_detached( dt_real, &norm_r ); bli_obj_scalar_init_detached( dt_real, &m_r ); bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_fnormv( y, &norm_r ); bli_copysc( beta, &aplusb ); bli_addsc( &alpha_conj, &aplusb ); bli_setsc( ( double )m, 0.0, &m_r ); bli_absqsc( &aplusb, &temp_r ); bli_mulsc( &m_r, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_subsc( &temp_r, &norm_r ); bli_getsc( &norm_r, resid, &junk ); }
void libblis_test_scalv_check ( test_params_t* params, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( y ); obj_t norm_y_r; obj_t nbeta; obj_t y2; double junk; // // Pre-conditions: // - y_orig is randomized. // Note: // - beta should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := conjbeta(beta) * y_orig // // is functioning correctly if // // normf( y + -conjbeta(beta) * y_orig ) // // is negligible. // bli_obj_create( dt, m, 1, 0, 0, &y2 ); bli_copyv( y_orig, &y2 ); bli_obj_scalar_init_detached( dt, &nbeta ); bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_copysc( beta, &nbeta ); bli_mulsc( &BLIS_MINUS_ONE, &nbeta ); bli_scalv( &nbeta, &y2 ); bli_addv( &y2, y ); bli_normfv( y, &norm_y_r ); bli_getsc( &norm_y_r, resid, &junk ); bli_obj_free( &y2 ); }
void bli_obj_scalar_init_detached_copy_of( num_t dt, conj_t conj, obj_t* alpha, obj_t* beta ) { obj_t alpha_local; // Make a local copy of alpha so we can apply the conj parameter. bli_obj_alias_to( *alpha, alpha_local ); bli_obj_apply_conj( conj, alpha_local ); // Initialize beta without a buffer and then attach its internal buffer. bli_obj_scalar_init_detached( dt, beta ); // Copy the scalar value in a to object b, conjugating and/or // typecasting if needed. bli_copysc( &alpha_local, beta ); }
void libblis_test_dotxaxpyf_check ( test_params_t* params, obj_t* alpha, obj_t* at, obj_t* a, obj_t* w, obj_t* x, obj_t* beta, obj_t* y, obj_t* z, obj_t* y_orig, obj_t* z_orig, double* resid ) { num_t dt = bli_obj_datatype( *y ); num_t dt_real = bli_obj_datatype_proj_to_real( *y ); dim_t m = bli_obj_vector_dim( *z ); dim_t b_n = bli_obj_vector_dim( *y ); dim_t i; obj_t a1, chi1, psi1, v, q; obj_t alpha_chi1; obj_t norm; double resid1, resid2; double junk; // // Pre-conditions: // - a is randomized. // - w is randomized. // - x is randomized. // - y is randomized. // - z is randomized. // - at is an alias to a. // Note: // - alpha and beta should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * conjat(A^T) * conjw(w) // z := z_orig + alpha * conja(A) * conjx(x) // // is functioning correctly if // // normf( y - v ) // // and // // normf( z - q ) // // are negligible, where v and q contain y and z as computed by repeated // calls to dotxv and axpyv, respectively. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_scalar_init_detached( dt, &alpha_chi1 ); bli_obj_create( dt, b_n, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &q ); bli_copyv( y_orig, &v ); bli_copyv( z_orig, &q ); // v := beta * v + alpha * conjat(at) * conjw(w) for ( i = 0; i < b_n; ++i ) { bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, at, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, &v, &psi1 ); bli_dotxv( alpha, &a1, w, beta, &psi1 ); } // q := q + alpha * conja(a) * conjx(x) for ( i = 0; i < b_n; ++i ) { bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, x, &chi1 ); bli_copysc( &chi1, &alpha_chi1 ); bli_mulsc( alpha, &alpha_chi1 ); bli_axpyv( &alpha_chi1, &a1, &q ); } bli_subv( y, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, &resid1, &junk ); bli_subv( z, &q ); bli_normfv( &q, &norm ); bli_getsc( &norm, &resid2, &junk ); *resid = bli_fmaxabs( resid1, resid2 ); bli_obj_free( &v ); bli_obj_free( &q ); }
void libblis_test_axpyf_check( obj_t* alpha, obj_t* a, obj_t* x, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_datatype( *y ); num_t dt_real = bli_obj_datatype_proj_to_real( *y ); dim_t m = bli_obj_vector_dim( *y ); dim_t b_n = bli_obj_width( *a ); dim_t i; obj_t a1, chi1, v; obj_t alpha_chi1; obj_t norm; double junk; // // Pre-conditions: // - a is randomized. // - x is randomized. // - y is randomized. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig + alpha * conja(A) * conjx(x) // // is functioning correctly if // // normf( y - v ) // // is negligible, where v contains y as computed by repeated calls to // axpyv. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_scalar_init_detached( dt, &alpha_chi1 ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_copyv( y_orig, &v ); for ( i = 0; i < b_n; ++i ) { bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, x, &chi1 ); bli_copysc( &chi1, &alpha_chi1 ); bli_mulsc( alpha, &alpha_chi1 ); bli_axpyv( &alpha_chi1, &a1, &v ); } bli_subv( y, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); }
void libblis_test_setv_experiment( test_params_t* params, test_op_t* op, mt_impl_t impl, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m; obj_t beta; obj_t x; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); // Initialize beta to unit. bli_copysc( &BLIS_ONE, &beta ); // Randomize x. bli_randv( &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_setv_impl( impl, &beta, &x ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( x ) ) *perf *= 2.0; // Perform checks. libblis_test_setv_check( &beta, &x, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); }
void libblis_test_trsv_check( obj_t* alpha, obj_t* a, obj_t* x, obj_t* x_orig, double* resid ) { num_t dt = bli_obj_datatype( *x ); num_t dt_real = bli_obj_datatype_proj_to_real( *x ); dim_t m = bli_obj_vector_dim( *x ); uplo_t uploa = bli_obj_uplo( *a ); trans_t transa = bli_obj_conjtrans_status( *a ); obj_t alpha_inv; obj_t a_local, y; obj_t norm; double junk; // // Pre-conditions: // - a is randomized and triangular. // - x is randomized. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // x := alpha * inv(transa(A)) * x_orig // // is functioning correctly if // // fnorm( y - x_orig ) // // is negligible, where // // y = inv(alpha) * transa(A_dense) * x // bli_obj_scalar_init_detached( dt, &alpha_inv ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_copysc( &BLIS_ONE, &alpha_inv ); bli_divsc( alpha, &alpha_inv ); bli_obj_create( dt, m, 1, 0, 0, &y ); bli_obj_create( dt, m, m, 0, 0, &a_local ); bli_obj_set_struc( BLIS_TRIANGULAR, a_local ); bli_obj_set_uplo( uploa, a_local ); bli_obj_toggle_uplo_if_trans( transa, a_local ); bli_copym( a, &a_local ); bli_mktrim( &a_local ); bli_obj_set_struc( BLIS_GENERAL, a_local ); bli_obj_set_uplo( BLIS_DENSE, a_local ); bli_gemv( &alpha_inv, &a_local, x, &BLIS_ZERO, &y ); bli_subv( x_orig, &y ); bli_fnormv( &y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &y ); bli_obj_free( &a_local ); }
void libblis_test_dotaxpyv_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m; conj_t conjxt, conjx, conjy; conj_t conjconjxty; obj_t alpha, xt, x, y, rho, z; obj_t z_save; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjxt ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); bli_param_map_char_to_blis_conj( pc_str[2], &conjy ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &rho ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &z ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &z_save ); // Set alpha. if ( bli_obj_is_real( z ) ) { bli_setsc( -0.8, 0.0, &alpha ); } else { bli_setsc( 0.0, -0.8, &alpha ); } // Randomize x and z, and save z. bli_randv( &x ); bli_randv( &z ); bli_copyv( &z, &z_save ); // Create an alias to x for xt. (Note that it doesn't actually need to be // transposed.) bli_obj_alias_to( x, xt ); // Determine whether to make a copy of x with or without conjugation. // // conjx conjy ~conjx^conjy y is initialized as // n n c y = conj(x) // n c n y = x // c n n y = x // c c c y = conj(x) // conjconjxty = bli_apply_conj( conjxt, conjy ); conjconjxty = bli_conj_toggled( conjconjxty ); bli_obj_set_conj( conjconjxty, xt ); bli_copyv( &xt, &y ); // Apply the parameters. bli_obj_set_conj( conjxt, xt ); bli_obj_set_conj( conjx, x ); bli_obj_set_conj( conjy, y ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copysc( &BLIS_MINUS_ONE, &rho ); bli_copyv( &z_save, &z ); time = bli_clock(); libblis_test_dotaxpyv_impl( iface, &alpha, &xt, &x, &y, &rho, &z ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( z ) ) *perf *= 4.0; // Perform checks. libblis_test_dotaxpyv_check( &alpha, &xt, &x, &y, &rho, &z, &z_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &z, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &z ); bli_obj_free( &z_save ); }