コード例 #1
0
ファイル: bli_hemv_blk_var1.c プロジェクト: figual/blis
void bli_hemv_blk_var1( conj_t  conjh,
                        obj_t*  alpha,
                        obj_t*  a,
                        obj_t*  x,
                        obj_t*  beta,
                        obj_t*  y,
                        cntx_t* cntx,
                        hemv_t* cntl )
{
	obj_t   a11, a11_pack;
	obj_t   a10;
	obj_t   x1, x1_pack;
	obj_t   x0;
	obj_t   y1, y1_pack;
	obj_t   y0;

	dim_t   mn;
	dim_t   ij;
	dim_t   b_alg;

	// Even though this blocked algorithm is expressed only in terms of the
	// lower triangular case, the upper triangular case is still supported:
	// when bli_acquire_mpart_tl2br() is passed a matrix that is stored in
	// in the upper triangle, and the requested subpartition resides in the
	// lower triangle (as is the case for this algorithm), the routine fills
	// the request as if the caller had actually requested the corresponding
	// "mirror" subpartition in the upper triangle, except that it marks the
	// subpartition for transposition (and conjugation).

	// Initialize objects for packing.
	bli_obj_init_pack( &a11_pack );
	bli_obj_init_pack( &x1_pack );
	bli_obj_init_pack( &y1_pack );

	// Query dimension.
	mn = bli_obj_length( a );

	// y = beta * y;
	bli_scalv_int( beta,
	               y,
	               cntx, bli_cntl_sub_scalv( cntl ) );

	// Partition diagonally.
	for ( ij = 0; ij < mn; ij += b_alg )
	{
		// Determine the current algorithmic blocksize.
		b_alg = bli_determine_blocksize_f( ij, mn, a,
		                                   bli_cntl_bszid( cntl ), cntx );

		// Acquire partitions for A11, A10, x1, x0, y1, and y0.
		bli_acquire_mpart_tl2br( BLIS_SUBPART11,
		                         ij, b_alg, a, &a11 );
		bli_acquire_mpart_tl2br( BLIS_SUBPART10,
		                         ij, b_alg, a, &a10 );
		bli_acquire_vpart_f2b( BLIS_SUBPART1,
		                       ij, b_alg, x, &x1 );
		bli_acquire_vpart_f2b( BLIS_SUBPART0,
		                       ij, b_alg, x, &x0 );
		bli_acquire_vpart_f2b( BLIS_SUBPART1,
		                       ij, b_alg, y, &y1 );
		bli_acquire_vpart_f2b( BLIS_SUBPART0,
		                       ij, b_alg, y, &y0 );

		// Initialize objects for packing A11, x1, and y1 (if needed).
		bli_packm_init( &a11, &a11_pack,
		                cntx, bli_cntl_sub_packm_a11( cntl ) );
		bli_packv_init( &x1, &x1_pack,
		                cntx, bli_cntl_sub_packv_x1( cntl ) );
		bli_packv_init( &y1, &y1_pack,
		                cntx, bli_cntl_sub_packv_y1( cntl ) );

		// Copy/pack A11, x1, y1 (if needed).
		bli_packm_int( &a11, &a11_pack,
		               cntx, bli_cntl_sub_packm_a11( cntl ),
                       &BLIS_PACKM_SINGLE_THREADED );
		bli_packv_int( &x1, &x1_pack,
		               cntx, bli_cntl_sub_packv_x1( cntl ) );
		bli_packv_int( &y1, &y1_pack,
		               cntx, bli_cntl_sub_packv_y1( cntl ) );

		// y0 = y0 + alpha * A10' * x1;
		bli_gemv_int( bli_apply_conj( conjh, BLIS_TRANSPOSE ),
		              BLIS_NO_CONJUGATE,
	                  alpha,
		              &a10,
		              &x1_pack,
		              &BLIS_ONE,
		              &y0,
		              cntx,
		              bli_cntl_sub_gemv_t_rp( cntl ) );

		// y1 = y1 + alpha * A11 * x1;
		bli_hemv_int( conjh,
		              alpha,
		              &a11_pack,
		              &x1_pack,
		              &BLIS_ONE,
		              &y1_pack,
		              cntx,
		              bli_cntl_sub_hemv( cntl ) );

		// y1 = y1 + alpha * A10 * x0;
		bli_gemv_int( BLIS_NO_TRANSPOSE,
		              BLIS_NO_CONJUGATE,
	                  alpha,
		              &a10,
		              &x0,
		              &BLIS_ONE,
		              &y1_pack,
		              cntx,
		              bli_cntl_sub_gemv_n_rp( cntl ) );

		// Copy/unpack y1 (if y1 was packed).
		bli_unpackv_int( &y1_pack, &y1,
		                 cntx, bli_cntl_sub_unpackv_y1( cntl ) );
	}

	// If any packing buffers were acquired within packm, release them back
	// to the memory manager.
	bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) );
	bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) );
	bli_packv_release( &y1_pack, bli_cntl_sub_packv_y1( cntl ) );
}
コード例 #2
0
ファイル: test_dotaxpyv.c プロジェクト: jmhautbois/blis
void libblis_test_dotaxpyv_experiment( test_params_t* params,
                                       test_op_t*     op,
                                       iface_t        iface,
                                       num_t          datatype,
                                       char*          pc_str,
                                       char*          sc_str,
                                       unsigned int   p_cur,
                                       double*        perf,
                                       double*        resid )
{
	unsigned int n_repeats = params->n_repeats;
	unsigned int i;

	double       time_min  = 1e9;
	double       time;

	dim_t        m;

	conj_t       conjxt, conjx, conjy;
	conj_t       conjconjxty;

	obj_t        alpha, xt, x, y, rho, z;
	obj_t        z_save;


	// Map the dimension specifier to an actual dimension.
	m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );

	// Map parameter characters to BLIS constants.
	bli_param_map_char_to_blis_conj( pc_str[0], &conjxt );
	bli_param_map_char_to_blis_conj( pc_str[1], &conjx );
	bli_param_map_char_to_blis_conj( pc_str[2], &conjy );

	// Create test scalars.
	bli_obj_scalar_init_detached( datatype, &alpha );
	bli_obj_scalar_init_detached( datatype, &rho );

	// Create test operands (vectors and/or matrices).
	libblis_test_vobj_create( params, datatype, sc_str[0], m, &x );
	libblis_test_vobj_create( params, datatype, sc_str[1], m, &y );
	libblis_test_vobj_create( params, datatype, sc_str[2], m, &z );
	libblis_test_vobj_create( params, datatype, sc_str[2], m, &z_save );

	// Set alpha.
	if ( bli_obj_is_real( z ) )
	{
		bli_setsc( -0.8,  0.0, &alpha );
	}
	else
	{
		bli_setsc(  0.0, -0.8, &alpha );
	}

	// Randomize x and z, and save z.
	bli_randv( &x );
	bli_randv( &z );
	bli_copyv( &z, &z_save );

	// Create an alias to x for xt. (Note that it doesn't actually need to be
	// transposed.)
	bli_obj_alias_to( x, xt );

	// Determine whether to make a copy of x with or without conjugation.
	// 
	//  conjx conjy  ~conjx^conjy   y is initialized as
	//  n     n      c              y = conj(x)
	//  n     c      n              y = x
	//  c     n      n              y = x
	//  c     c      c              y = conj(x)
	//
	conjconjxty = bli_apply_conj( conjxt, conjy );
	conjconjxty = bli_conj_toggled( conjconjxty );
	bli_obj_set_conj( conjconjxty, xt );
	bli_copyv( &xt, &y );

	// Apply the parameters.
	bli_obj_set_conj( conjxt, xt );
	bli_obj_set_conj( conjx,  x );
	bli_obj_set_conj( conjy,  y );

	// Repeat the experiment n_repeats times and record results. 
	for ( i = 0; i < n_repeats; ++i )
	{
		bli_copysc( &BLIS_MINUS_ONE, &rho );
		bli_copyv( &z_save, &z );

		time = bli_clock();

		libblis_test_dotaxpyv_impl( iface, &alpha, &xt, &x, &y, &rho, &z );

		time_min = bli_clock_min_diff( time_min, time );
	}

	// Estimate the performance of the best experiment repeat.
	*perf = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
	if ( bli_obj_is_complex( z ) ) *perf *= 4.0;

	// Perform checks.
	libblis_test_dotaxpyv_check( &alpha, &xt, &x, &y, &rho, &z, &z_save, resid );

	// Zero out performance and residual if output vector is empty.
	libblis_test_check_empty_problem( &z, perf, resid );

	// Free the test objects.
	bli_obj_free( &x );
	bli_obj_free( &y );
	bli_obj_free( &z );
	bli_obj_free( &z_save );
}