void libblis_test_symv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; dim_t m; uplo_t uploa; conj_t conja; conj_t conjx; obj_t alpha, a, x, beta, y; obj_t y_save; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_conj( pc_str[1], &conja ); bli_param_map_char_to_blis_conj( pc_str[2], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save ); // Set alpha and beta. if ( bli_obj_is_real( &y ) ) { bli_setsc( 1.0, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 0.5, 0.5, &alpha ); bli_setsc( -0.5, 0.5, &beta ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely symmetric, and zero the unstored triangle // to ensure the implementation reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mksymm( &a ); bli_mktrim( &a ); // Randomize x and y, and save y. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &y ); bli_copyv( &y, &y_save ); // Apply the remaining parameters. bli_obj_set_conj( conja, &a ); bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_symv_impl( iface, &alpha, &a, &x, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_symv_check( params, &alpha, &a, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); }
void libblis_test_normfm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; num_t dt_real = bli_datatype_proj_to_real( datatype ); double time_min = DBL_MAX; double time; dim_t m, n; obj_t beta, norm; obj_t x; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); bli_obj_scalar_init_detached( dt_real, &norm ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &x ); // Initialize beta to 2 - 2i. bli_setsc( 2.0, -2.0, &beta ); // Set all elements of x to beta. bli_setm( &beta, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_normfm_impl( iface, &x, &norm ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( x ) ) *perf *= 2.0; // Perform checks. libblis_test_normfm_check( params, &beta, &x, &norm, resid ); // Zero out performance and residual if input matrix is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); }
void libblis_test_dotxaxpyf_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, b_n; conj_t conjat, conja, conjw, conjx; obj_t alpha, at, a, w, x, beta, y, z; obj_t y_save, z_save; cntx_t cntx; // Initialize a context. bli_dotxaxpyf_cntx_init( &cntx ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Query the operation's fusing factor for the current datatype. b_n = bli_cntx_get_blksz_def_dt( datatype, BLIS_XF, &cntx ); // Store the fusing factor so that the driver can retrieve the value // later when printing results. op->dim_aux[0] = b_n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjat ); bli_param_map_char_to_blis_conj( pc_str[1], &conja ); bli_param_map_char_to_blis_conj( pc_str[2], &conjw ); bli_param_map_char_to_blis_conj( pc_str[3], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, b_n, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &w ); libblis_test_vobj_create( params, datatype, sc_str[2], b_n, &x ); libblis_test_vobj_create( params, datatype, sc_str[3], b_n, &y ); libblis_test_vobj_create( params, datatype, sc_str[3], b_n, &y_save ); libblis_test_vobj_create( params, datatype, sc_str[4], m, &z ); libblis_test_vobj_create( params, datatype, sc_str[4], m, &z_save ); // Set alpha. if ( bli_obj_is_real( y ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 1.2, 0.1, &alpha ); bli_setsc( -1.0, -0.1, &beta ); } // Randomize A, w, x, y, and z, and save y and z. libblis_test_mobj_randomize( params, FALSE, &a ); libblis_test_vobj_randomize( params, FALSE, &w ); libblis_test_vobj_randomize( params, FALSE, &x ); libblis_test_vobj_randomize( params, FALSE, &y ); libblis_test_vobj_randomize( params, FALSE, &z ); bli_copyv( &y, &y_save ); bli_copyv( &z, &z_save ); // Create an alias to a for at. (Note that it should NOT actually be // marked for transposition since the transposition is part of the dotxf // subproblem.) bli_obj_alias_to( a, at ); // Apply the parameters. bli_obj_set_conj( conjat, at ); bli_obj_set_conj( conja, a ); bli_obj_set_conj( conjw, w ); bli_obj_set_conj( conjx, x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); bli_copyv( &z_save, &z ); time = bli_clock(); libblis_test_dotxaxpyf_impl( iface, &alpha, &at, &a, &w, &x, &beta, &y, &z, &cntx ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * b_n + 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( y ) ) *perf *= 4.0; // Perform checks. libblis_test_dotxaxpyf_check( params, &alpha, &at, &a, &w, &x, &beta, &y, &z, &y_save, &z_save, resid ); // Zero out performance and residual if either output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); libblis_test_check_empty_problem( &z, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &w ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &z ); bli_obj_free( &y_save ); bli_obj_free( &z_save ); // Finalize the context. bli_dotxaxpyf_cntx_finalize( &cntx ); }
void libblis_test_gemv_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n; trans_t transa; conj_t conjx; obj_t kappa; obj_t alpha, a, x, beta, y; obj_t y_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &kappa ); bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[0], m, n, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], n, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save ); // Set alpha and beta. if ( bli_obj_is_real( y ) ) { bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 0.0, 2.0, &alpha ); bli_setsc( 0.0, -1.0, &beta ); } // Initialize diagonal of matrix A. bli_setsc( 2.0, -1.0, &kappa ); bli_setm( &BLIS_ZERO, &a ); bli_setd( &kappa, &a ); // Randomize x and y, and save y. bli_randv( &x ); bli_randv( &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_conj( conjx, x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_gemv_impl( iface, &alpha, &a, &x, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( y ) ) *perf *= 4.0; // Perform checks. libblis_test_gemv_check( &kappa, &alpha, &a, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); }
void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, test_op_t* op, mt_impl_t impl, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n, k; char sc_a = 'c'; char sc_b = 'r'; side_t side = BLIS_LEFT; uplo_t uploa; obj_t kappa; obj_t alpha; obj_t a_big, a, b; obj_t b11, c11; obj_t ap, bp; obj_t a1xp, a11p, bx1p, b11p; obj_t c11_save; // Map the dimension specifier to actual dimensions. k = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Fix m and n to MR and NR, respectively. m = bli_blksz_for_type( datatype, gemm_mr ); n = bli_blksz_for_type( datatype, gemm_nr ); // Store the register blocksizes so that the driver can retrieve the // values later when printing results. op->dim_aux[0] = m; op->dim_aux[1] = n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &kappa ); bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_a, k+m, k+m, &a_big ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_b, k+m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c11 ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c11_save ); // Set alpha. if ( bli_obj_is_real( b ) ) { bli_setsc( 2.0, 0.0, &alpha ); } else { bli_setsc( 2.0, 0.0, &alpha ); } // Set the structure, uplo, and diagonal offset properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, a_big ); bli_obj_set_uplo( uploa, a_big ); // Randomize A and make it densely triangular. bli_randm( &a_big ); // Normalize B and save. bli_randm( &b ); bli_setsc( 1.0/( double )m, 0.0, &kappa ); bli_scalm( &kappa, &b ); // Use the last m rows of A_big as A. bli_acquire_mpart_t2b( BLIS_SUBPART1, k, m, &a_big, &a ); // Locate the B11 block of B, copy to C11, and save. if ( bli_obj_is_lower( a ) ) bli_acquire_mpart_t2b( BLIS_SUBPART1, k, m, &b, &b11 ); else bli_acquire_mpart_t2b( BLIS_SUBPART1, 0, m, &b, &b11 ); bli_copym( &b11, &c11 ); bli_copym( &c11, &c11_save ); // Initialize pack objects. bli_obj_init_pack( &ap ); bli_obj_init_pack( &bp ); // Create pack objects for a and b. libblis_test_pobj_create( gemm_mr, gemm_mr, BLIS_INVERT_DIAG, BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK, &a, &ap ); libblis_test_pobj_create( gemm_mr, gemm_nr, BLIS_NO_INVERT_DIAG, BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL, &b, &bp ); // Pack the contents of a to ap. bli_packm_blk_var3( &a, &ap ); // Pack the contents of b to bp. bli_packm_blk_var2( &b, &bp ); // Create subpartitions from the a and b panels. bli_gemmtrsm_ukr_make_subparts( k, &ap, &bp, &a1xp, &a11p, &bx1p, &b11p ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c11_save, &c11 ); // Re-pack the contents of b to bp. bli_packm_blk_var2( &b, &bp ); time = bli_clock(); libblis_test_gemmtrsm_ukr_impl( impl, side, &alpha, &a1xp, &a11p, &bx1p, &b11p, &c11 ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n * k + 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( b ) ) *perf *= 4.0; // Perform checks. libblis_test_gemmtrsm_ukr_check( side, &alpha, &a1xp, &a11p, &bx1p, &b11p, &c11, &c11_save, resid ); // Zero out performance and residual if output matrix is empty. //libblis_test_check_empty_problem( &c11, perf, resid ); // Release packing buffers within pack objects. bli_obj_release_pack( &ap ); bli_obj_release_pack( &bp ); // Free the test objects. bli_obj_free( &a_big ); bli_obj_free( &b ); bli_obj_free( &c11 ); bli_obj_free( &c11_save ); }
void libblis_test_syrk_experiment( test_params_t* params, test_op_t* op, mt_impl_t impl, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, k; uplo_t uploc; trans_t transa; obj_t kappa; obj_t alpha, a, beta, c; obj_t c_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploc ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &kappa ); bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[0], m, k, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, m, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, m, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( c ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { // For syrk, both alpha and beta may be complex since, unlike herk, // C is symmetric in both the real and complex cases. bli_setsc( 1.2, 0.5, &alpha ); bli_setsc( -1.0, 0.5, &beta ); } // Randomize A. bli_randm( &a ); // Set the structure and uplo properties of C. bli_obj_set_struc( BLIS_SYMMETRIC, c ); bli_obj_set_uplo( uploc, c ); // Randomize A, make it densely symmetric, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. bli_randm( &c ); bli_mksymm( &c ); bli_mktrim( &c ); // Save C and set its structure and uplo properties. bli_obj_set_struc( BLIS_SYMMETRIC, c_save ); bli_obj_set_uplo( uploc, c_save ); bli_copym( &c, &c_save ); // Normalize by k. bli_setsc( 1.0/( double )k, 0.0, &kappa ); bli_scalm( &kappa, &a ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_syrk_impl( impl, &alpha, &a, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( c ) ) *perf *= 4.0; // Perform checks. libblis_test_syrk_check( &alpha, &a, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); }
void libblis_test_axpyf_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, b_n; conj_t conja, conjx; obj_t alpha, a, x, y; obj_t y_save; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Query the operation's fusing factor for the current datatype. b_n = bli_axpyf_fusefac( datatype ); // Store the fusing factor so that the driver can retrieve the value // later when printing results. op->dim_aux[0] = b_n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conja ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, b_n, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], b_n, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save ); // Set alpha. if ( bli_obj_is_real( y ) ) { bli_setsc( -1.0, 0.0, &alpha ); } else { bli_setsc( 0.0, -1.0, &alpha ); } // Randomize A, x, and y, and save y. bli_randm( &a ); bli_randv( &x ); bli_randv( &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conja, a ); bli_obj_set_conj( conjx, x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_axpyf_impl( iface, &alpha, &a, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( y ) ) *perf *= 4.0; // Perform checks. libblis_test_axpyf_check( &alpha, &a, &x, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); }
void libblis_test_scalm_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n; conj_t conjbeta; obj_t beta, y; obj_t y_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y_save ); // Set beta to 0 + i. //bli_setsc( 0.0, 1.0, &beta ); if ( bli_obj_is_real( y ) ) bli_setsc( -2.0, 0.0, &beta ); else bli_setsc( 0.0, -2.0, &beta ); // Randomize and save y. bli_randm( &y ); bli_copym( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conjbeta, beta ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_scalm_impl( iface, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( y ) ) *perf *= 6.0; // Perform checks. libblis_test_scalm_check( &beta, &y, &y_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &y ); bli_obj_free( &y_save ); }
void libblis_test_her_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m; uplo_t uploa; conj_t conjx; obj_t alpha, x, a; obj_t a_save; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, m, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, m, &a_save ); // Set alpha. //bli_copysc( &BLIS_MINUS_ONE, &alpha ); bli_setsc( -1.0, 0.0, &alpha ); // Randomize x. bli_randv( &x ); // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_HERMITIAN, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); // Save A and set its structure and uplo properties. bli_obj_set_struc( BLIS_HERMITIAN, a_save ); bli_obj_set_uplo( uploa, a_save ); bli_copym( &a, &a_save ); // Apply the remaining parameters. bli_obj_set_conj( conjx, x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &a_save, &a ); time = bli_clock(); libblis_test_her_impl( iface, &alpha, &x, &a ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( a ) ) *perf *= 4.0; // Perform checks. libblis_test_her_check( &alpha, &x, &a, &a_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &a, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &a ); bli_obj_free( &a_save ); }
void libblis_test_gemm_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n, k; trans_t transa; trans_t transb; obj_t kappa; obj_t alpha, a, b, beta, c; obj_t c_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[2], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_trans( pc_str[1], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &kappa ); bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[0], m, k, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[1], k, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( c ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 1.2, 0.8, &alpha ); bli_setsc( -1.0, 1.0, &beta ); } // Randomize A, B, and C, and save C. bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_copym( &c, &c_save ); // Normalize by k. bli_setsc( 1.0/( double )k, 0.0, &kappa ); bli_scalm( &kappa, &a ); bli_scalm( &kappa, &b ); // Apply the parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_conjtrans( transb, b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_gemm_impl( iface, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( c ) ) *perf *= 4.0; // Perform checks. libblis_test_gemm_check( &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); }
void libblis_test_subm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { double time_min = DBL_MAX; double time; dim_t m, n; trans_t transx; obj_t alpha, beta; obj_t x, y; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, sc_str[0], m, n, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, n, &y ); // Initialize alpha and beta. bli_setsc( 1.0, 1.0, &alpha ); bli_setsc( 3.0, 3.0, &beta ); // Randomize x. bli_setm( &alpha, &x ); bli_setm( &beta, &y ); // Apply the parameters. bli_obj_set_conjtrans( transx, x ); // Disable repeats since bli_copym() is not yet tested. //for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_subm_impl( iface, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( x ) ) *perf *= 2.0; // Perform checks. libblis_test_subm_check( params, &alpha, &beta, &x, &y, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); }
void libblis_test_gemm_md ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t dt_a, dt_b, dt_c; num_t dt_complex; dim_t m, n, k; trans_t transa; trans_t transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Decode the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &dt_c ); bli_param_map_char_to_blis_dt( dc_str[1], &dt_a ); bli_param_map_char_to_blis_dt( dc_str[2], &dt_b ); // Project one of the datatypes (it doesn't matter which) to the // complex domain. dt_complex = bli_dt_proj_to_complex( dt_c ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[2], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_trans( pc_str[1], &transb ); // Create test scalars. bli_obj_scalar_init_detached( dt_complex, &alpha ); bli_obj_scalar_init_detached( dt_complex, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, dt_a, transa, sc_str[1], m, k, &a ); libblis_test_mobj_create( params, dt_b, transb, sc_str[2], k, n, &b ); libblis_test_mobj_create( params, dt_c, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c ); libblis_test_mobj_create( params, dt_c, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c_save ); // For mixed-precision, set the computation precision of C. if ( params->mixed_precision ) { num_t dt_comp; prec_t comp_prec; // The computation precision is encoded in the computation datatype, // which appears as an additional char in dc_str. bli_param_map_char_to_blis_dt( dc_str[3], &dt_comp ); // Extract the precision from the computation datatype. comp_prec = bli_dt_prec( dt_comp ); // Set the computation precision of C. bli_obj_set_comp_prec( comp_prec, &c ); } // Set alpha and beta. { bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( 1.2, 0.5, &beta ); //bli_setsc( 1.0, 0.0, &alpha ); //bli_setsc( 1.0, 0.0, &beta ); } // Randomize A, B, and C, and save C. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_randomize( params, TRUE, &b ); libblis_test_mobj_randomize( params, TRUE, &c ); bli_copym( &c, &c_save ); // Apply the parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); #if 0 bli_printm( "a", &a, "%5.2f", "" ); bli_printm( "b", &b, "%5.2f", "" ); bli_printm( "c", &c, "%5.2f", "" ); bli_printm( "alpha", &alpha, "%5.2f", "" ); bli_printm( "beta", &beta, "%5.2f", "" ); #endif libblis_test_gemm_impl( iface, &alpha, &a, &b, &beta, &c ); #if 0 bli_printm( "c after", &c, "%5.2f", "" ); #endif time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. //*perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; //if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; *perf = libblis_test_gemm_flops( &a, &b, &c ) / time_min / FLOPS_PER_UNIT_PERF; // Perform checks. libblis_test_gemm_md_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); }
void libblis_test_ger_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; dim_t m, n; conj_t conjx, conjy; obj_t alpha, x, y, a; obj_t a_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], n, &y ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &a_save ); // Set alpha. if ( bli_obj_is_real( &a ) ) bli_setsc( -1.0, 1.0, &alpha ); else bli_setsc( -1.0, 1.0, &alpha ); // Randomize x and y. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &y ); // Initialize A to identity and save. bli_setm( &BLIS_ZERO, &a ); bli_setd( &BLIS_ONE, &a ); bli_copym( &a, &a_save ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); bli_obj_set_conj( conjy, &y ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &a_save, &a ); time = bli_clock(); libblis_test_ger_impl( iface, &alpha, &x, &y, &a ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &a ) ) *perf *= 4.0; // Perform checks. libblis_test_ger_check( params, &alpha, &x, &y, &a, &a_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &a, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &a_save ); }
void libblis_test_randm_experiment( test_params_t* params, test_op_t* op, mt_impl_t impl, num_t dt, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n; char x_store; obj_t x; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. // Extract the storage character for each operand. x_store = sc_str[0]; // Create the test objects. libblis_test_mobj_create( params, dt, BLIS_NO_TRANSPOSE, x_store, m, n, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_randm_impl( impl, &x ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( x ) ) *perf *= 2.0; // Perform checks. // For randm(), we don't return a meaningful residual/diff, since we can't // really say for sure what is "random" and what is not, so instead we // manually perform some checks that will fail under some scenarios whic // we consider to be likely. libblis_test_randm_check( &x, resid ); // Zero out performance and residual if input matrix is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); }
void libblis_test_trsm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n; dim_t mn_side; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; obj_t alpha, a, b; obj_t b_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_side( pc_str[0], &side ); bli_param_map_char_to_blis_uplo( pc_str[1], &uploa ); bli_param_map_char_to_blis_trans( pc_str[2], &transa ); bli_param_map_char_to_blis_diag( pc_str[3], &diaga ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); libblis_test_mobj_create( params, datatype, transa, sc_str[0], mn_side, mn_side, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, n, &b_save ); // Set alpha. if ( bli_obj_is_real( b ) ) { bli_setsc( 2.0, 0.0, &alpha ); } else { bli_setsc( 2.0, 0.0, &alpha ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, load the diagonal, make it densely triangular. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_load_diag( params, &a ); bli_mktrim( &a ); // Randomize B and save B. libblis_test_mobj_randomize( params, TRUE, &b ); bli_copym( &b, &b_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_diag( diaga, a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &b_save, &b ); time = bli_clock(); libblis_test_trsm_impl( iface, side, &alpha, &a, &b ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( b ) ) *perf *= 4.0; // Perform checks. libblis_test_trsm_check( params, side, &alpha, &a, &b, &b_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &b, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &b_save ); }
void libblis_test_her2k_experiment ( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; dim_t m, k; uplo_t uploc; trans_t transa, transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploc ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); bli_param_map_char_to_blis_trans( pc_str[2], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[0], m, k, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[1], m, k, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, m, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, m, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( c ) ) { bli_setsc( 0.8, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { // For her2k, alpha may be complex, but beta must be real-valued // (in order to preserve the Hermitian structure of C). bli_setsc( 0.8, 0.5, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } // Randomize A and B. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_randomize( params, TRUE, &b ); // Set the structure and uplo properties of C. bli_obj_set_struc( BLIS_HERMITIAN, c ); bli_obj_set_uplo( uploc, c ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &c ); bli_mkherm( &c ); bli_mktrim( &c ); // Save C and set its structure and uplo properties. bli_obj_set_struc( BLIS_HERMITIAN, c_save ); bli_obj_set_uplo( uploc, c_save ); bli_copym( &c, &c_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_conjtrans( transb, b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_her2k_impl( iface, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( c ) ) *perf *= 4.0; // Perform checks. libblis_test_her2k_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); }
void libblis_test_trmv_experiment( test_params_t* params, test_op_t* op, mt_impl_t impl, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m; uplo_t uploa; trans_t transa; diag_t diaga; obj_t kappa; obj_t alpha, a, x; obj_t x_save; // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); bli_param_map_char_to_blis_diag( pc_str[2], &diaga ); // Create test scalars. bli_obj_init_scalar( datatype, &alpha ); bli_obj_init_scalar( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x_save ); // Set alpha. if ( bli_obj_is_real( x ) ) bli_setsc( -1.0, 0.0, &alpha ); else bli_setsc( 0.0, -1.0, &alpha ); // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, make it densely triangular. bli_randm( &a ); bli_mktrim( &a ); // Randomize x and save. bli_randv( &x ); bli_copyv( &x, &x_save ); // Normalize vectors by m. bli_setsc( 1.0/( double )m, 0.0, &kappa ); bli_scalv( &kappa, &x ); bli_scalv( &kappa, &x_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_diag( diaga, a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &x_save, &x ); time = bli_clock(); libblis_test_trmv_impl( impl, &alpha, &a, &x ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( x ) ) *perf *= 4.0; // Perform checks. libblis_test_trmv_check( &alpha, &a, &x, &x_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &x_save ); }
void libblis_test_hemm_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n; dim_t mn_side; side_t side; uplo_t uploa; conj_t conja; trans_t transb; obj_t kappa; obj_t alpha, a, b, beta, c; obj_t c_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_side( pc_str[0], &side ); bli_param_map_char_to_blis_uplo( pc_str[1], &uploa ); bli_param_map_char_to_blis_conj( pc_str[2], &conja ); bli_param_map_char_to_blis_trans( pc_str[3], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &kappa ); bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], mn_side, mn_side, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[1], m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( c ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 1.2, 0.8, &alpha ); bli_setsc( -1.0, 1.0, &beta ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_HERMITIAN, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation reads only from the stored region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); // Randomize B and C, and save C. bli_randm( &b ); bli_randm( &c ); bli_copym( &c, &c_save ); // Normalize by m. bli_setsc( 1.0/( double )m, 0.0, &kappa ); bli_scalm( &kappa, &b ); // Apply the remaining parameters. bli_obj_set_conj( conja, a ); bli_obj_set_conjtrans( transb, b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_hemm_impl( iface, side, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( c ) ) *perf *= 4.0; // Perform checks. libblis_test_hemm_check( side, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); }