void libblis_test_gemm_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = 1e9; double time; dim_t m, n, k; trans_t transa; trans_t transb; obj_t kappa; obj_t alpha, a, b, beta, c; obj_t c_save; // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[2], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_trans( pc_str[1], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &kappa ); bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[0], m, k, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[1], k, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( c ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 1.2, 0.8, &alpha ); bli_setsc( -1.0, 1.0, &beta ); } // Randomize A, B, and C, and save C. bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_copym( &c, &c_save ); // Normalize by k. bli_setsc( 1.0/( double )k, 0.0, &kappa ); bli_scalm( &kappa, &a ); bli_scalm( &kappa, &b ); // Apply the parameters. bli_obj_set_conjtrans( transa, a ); bli_obj_set_conjtrans( transb, b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_gemm_impl( iface, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( c ) ) *perf *= 4.0; // Perform checks. libblis_test_gemm_check( &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); }
void libblis_test_gemm_md ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t dt_a, dt_b, dt_c; num_t dt_complex; dim_t m, n, k; trans_t transa; trans_t transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Decode the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &dt_c ); bli_param_map_char_to_blis_dt( dc_str[1], &dt_a ); bli_param_map_char_to_blis_dt( dc_str[2], &dt_b ); // Project one of the datatypes (it doesn't matter which) to the // complex domain. dt_complex = bli_dt_proj_to_complex( dt_c ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[2], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_trans( pc_str[1], &transb ); // Create test scalars. bli_obj_scalar_init_detached( dt_complex, &alpha ); bli_obj_scalar_init_detached( dt_complex, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, dt_a, transa, sc_str[1], m, k, &a ); libblis_test_mobj_create( params, dt_b, transb, sc_str[2], k, n, &b ); libblis_test_mobj_create( params, dt_c, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c ); libblis_test_mobj_create( params, dt_c, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c_save ); // For mixed-precision, set the computation precision of C. if ( params->mixed_precision ) { num_t dt_comp; prec_t comp_prec; // The computation precision is encoded in the computation datatype, // which appears as an additional char in dc_str. bli_param_map_char_to_blis_dt( dc_str[3], &dt_comp ); // Extract the precision from the computation datatype. comp_prec = bli_dt_prec( dt_comp ); // Set the computation precision of C. bli_obj_set_comp_prec( comp_prec, &c ); } // Set alpha and beta. { bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( 1.2, 0.5, &beta ); //bli_setsc( 1.0, 0.0, &alpha ); //bli_setsc( 1.0, 0.0, &beta ); } // Randomize A, B, and C, and save C. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_randomize( params, TRUE, &b ); libblis_test_mobj_randomize( params, TRUE, &c ); bli_copym( &c, &c_save ); // Apply the parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); #if 0 bli_printm( "a", &a, "%5.2f", "" ); bli_printm( "b", &b, "%5.2f", "" ); bli_printm( "c", &c, "%5.2f", "" ); bli_printm( "alpha", &alpha, "%5.2f", "" ); bli_printm( "beta", &beta, "%5.2f", "" ); #endif libblis_test_gemm_impl( iface, &alpha, &a, &b, &beta, &c ); #if 0 bli_printm( "c after", &c, "%5.2f", "" ); #endif time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. //*perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; //if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; *perf = libblis_test_gemm_flops( &a, &b, &c ) / time_min / FLOPS_PER_UNIT_PERF; // Perform checks. libblis_test_gemm_md_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); }