예제 #1
0
void libblis_test_dotxaxpyf_check
     (
       test_params_t* params,
       obj_t*         alpha,
       obj_t*         at,
       obj_t*         a,
       obj_t*         w,
       obj_t*         x,
       obj_t*         beta,
       obj_t*         y,
       obj_t*         z,
       obj_t*         y_orig,
       obj_t*         z_orig,
       double*        resid
     )
{
	num_t  dt      = bli_obj_datatype( *y );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *y );

	dim_t  m       = bli_obj_vector_dim( *z );
	dim_t  b_n     = bli_obj_vector_dim( *y );

	dim_t  i;

	obj_t  a1, chi1, psi1, v, q;
	obj_t  alpha_chi1;
	obj_t  norm;

	double resid1, resid2;
	double junk;

	//
	// Pre-conditions:
	// - a is randomized.
	// - w is randomized.
	// - x is randomized.
	// - y is randomized.
	// - z is randomized.
	// - at is an alias to a.
	// Note:
	// - alpha and beta should have a non-zero imaginary component in the
	//   complex cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   y := beta * y_orig + alpha * conjat(A^T) * conjw(w)
	//   z :=        z_orig + alpha * conja(A)    * conjx(x)
	//
	// is functioning correctly if
	//
	//   normf( y - v )
	//
	// and
	//
	//   normf( z - q )
	//
	// are negligible, where v and q contain y and z as computed by repeated
	// calls to dotxv and axpyv, respectively.
	//

	bli_obj_scalar_init_detached( dt_real, &norm );
	bli_obj_scalar_init_detached( dt,      &alpha_chi1 );

	bli_obj_create( dt, b_n, 1, 0, 0, &v );
	bli_obj_create( dt, m,   1, 0, 0, &q );

	bli_copyv( y_orig, &v );
	bli_copyv( z_orig, &q );

	// v := beta * v + alpha * conjat(at) * conjw(w)
	for ( i = 0; i < b_n; ++i )
	{
		bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, at, &a1 );
		bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, &v, &psi1 );

		bli_dotxv( alpha, &a1, w, beta, &psi1 );
	}

	// q := q + alpha * conja(a) * conjx(x)
	for ( i = 0; i < b_n; ++i )
	{
		bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 );
		bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, x, &chi1 );

		bli_copysc( &chi1, &alpha_chi1 );
		bli_mulsc( alpha, &alpha_chi1 );

		bli_axpyv( &alpha_chi1, &a1, &q );
	}


	bli_subv( y, &v );
	bli_normfv( &v, &norm );
	bli_getsc( &norm, &resid1, &junk );

	bli_subv( z, &q );
	bli_normfv( &q, &norm );
	bli_getsc( &norm, &resid2, &junk );


	*resid = bli_fmaxabs( resid1, resid2 );

	bli_obj_free( &v );
	bli_obj_free( &q );
}
예제 #2
0
void libblis_test_dotaxpyv_check( obj_t*  alpha,
                                  obj_t*  xt,
                                  obj_t*  x,
                                  obj_t*  y,
                                  obj_t*  rho,
                                  obj_t*  z,
                                  obj_t*  z_orig,
                                  double* resid )
{
	num_t  dt      = bli_obj_datatype( *z );
	num_t  dt_real = bli_obj_datatype_proj_to_real( *z );

	dim_t  m       = bli_obj_vector_dim( *z );

	obj_t  rho_temp;

	obj_t  z_temp;
	obj_t  norm_z;

	double resid1, resid2;
	double junk;

	//
	// Pre-conditions:
	// - x is randomized.
	// - y is randomized.
	// - z_orig is randomized.
	// - xt is an alias to x.
	// Note:
	// - alpha should have a non-zero imaginary component in the complex
	//   cases in order to more fully exercise the implementation.
	//
	// Under these conditions, we assume that the implementation for
	//
	//   rho := conjxt(x^T) conjy(y)
	//   z := z_orig + alpha * conjx(x)
	//
	// is functioning correctly if
	//
	//   ( rho - rho_temp )
	//
	// and
	//
	//   normf( z - z_temp )
	//
	// are negligible, where rho_temp and z_temp contain rho and z as
	// computed by dotv and axpyv, respectively.
	//

	bli_obj_scalar_init_detached( dt,      &rho_temp );
	bli_obj_scalar_init_detached( dt_real, &norm_z );

	bli_obj_create( dt, m, 1, 0, 0, &z_temp );
	bli_copyv( z_orig, &z_temp );


	bli_dotv( xt, y, &rho_temp );
	bli_axpyv( alpha, x, &z_temp );


	bli_subsc( rho, &rho_temp );
	bli_getsc( &rho_temp, &resid1, &junk );

	bli_subv( &z_temp, z );
	bli_normfv( z, &norm_z );
	bli_getsc( &norm_z, &resid2, &junk );

	*resid = bli_fmaxabs( resid1, resid2 );

	bli_obj_free( &z_temp );
}