FLA_Error FLA_Hevd_lv_var4_components( dim_t n_iter_max, FLA_Obj A, FLA_Obj l, dim_t k_accum, dim_t b_alg,
                                       double* dtime_tred, double* dtime_tevd, double* dtime_appq )
{
	FLA_Error    r_val = FLA_SUCCESS;
	FLA_Uplo     uplo = FLA_LOWER_TRIANGULAR;
	FLA_Datatype dt;
	FLA_Datatype dt_real;
	FLA_Datatype dt_comp;
	FLA_Obj      T, r, d, e, G, R, W;
	FLA_Obj      d0, e0, ls, pu;
	dim_t        mn_A;
	dim_t        n_G = k_accum;
	double       dtime_temp;

	mn_A    = FLA_Obj_length( A );
	dt      = FLA_Obj_datatype( A );
	dt_real = FLA_Obj_datatype_proj_to_real( A );
	dt_comp = FLA_Obj_datatype_proj_to_complex( A );

	*dtime_tred = 1;
	*dtime_tevd = 1;
	*dtime_appq = 1;

	// If the matrix is a scalar, then the EVD is easy.
	if ( mn_A == 1 )
	{
		FLA_Copy( A, l );
		FLA_Set( FLA_ONE, A );

		return FLA_SUCCESS;
	}

	// Create a matrix to hold block Householder transformations.
	FLA_Tridiag_UT_create_T( A, &T );

	// Create a vector to hold the realifying scalars.
	FLA_Obj_create( dt,      mn_A,     1, 0, 0, &r );

	// Create vectors to hold the diagonal and sub-diagonal.
	FLA_Obj_create( dt_real, mn_A,     1, 0, 0, &d );
	FLA_Obj_create( dt_real, mn_A-1,   1, 0, 0, &e );
	FLA_Obj_create( dt_real, mn_A,     1, 0, 0, &d0 );
	FLA_Obj_create( dt_real, mn_A-1,   1, 0, 0, &e0 );
	FLA_Obj_create( dt_real, mn_A,     1, 0, 0, &pu );
	FLA_Obj_create( FLA_INT, mn_A,     1, 0, 0, &ls );
	FLA_Obj_create( dt_comp, mn_A-1, n_G, 0, 0, &G );
	FLA_Obj_create( dt_real, mn_A,  mn_A, 0, 0, &R );
	FLA_Obj_create( dt,      mn_A,  mn_A, 0, 0, &W );

  dtime_temp = FLA_Clock();
  {
	// Reduce the matrix to tridiagonal form.
	FLA_Tridiag_UT( uplo, A, T );
  }
  *dtime_tred = FLA_Clock() - dtime_temp;

	// Apply scalars to rotate elements on the sub-diagonal to the real domain.
	FLA_Tridiag_UT_realify( uplo, A, r );

	// Extract the diagonal and sub-diagonal from A.
	FLA_Tridiag_UT_extract_diagonals( uplo, A, d, e );

  dtime_temp = FLA_Clock();
  {
	// Form Q, overwriting A.
	FLA_Tridiag_UT_form_Q( uplo, A, T );
  }
  *dtime_appq = FLA_Clock() - dtime_temp;

	// Apply the scalars in r to Q.
	FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, r, A );

	// Find the eigenvalues only.
	FLA_Copy( d, d0 ); FLA_Copy( e, e0 );
	//r_val = FLA_Tevd_n_opt_var1( n_iter_max, d0, e0, G, A );
{
	int info;
	double* buff_d = FLA_DOUBLE_PTR( d0 );
	double* buff_e = FLA_DOUBLE_PTR( e0 );
	dsterf_( &mn_A, buff_d, buff_e, &info );
}
	FLA_Sort( FLA_FORWARD, d0 );
	FLA_Set( FLA_ZERO, ls );
	FLA_Set( FLA_ZERO, pu );

  dtime_temp = FLA_Clock();
  {
	// Perform an eigenvalue decomposition on the tridiagonal matrix.
	r_val = FLA_Tevd_v_opt_var4( n_iter_max, d, e, d0, ls, pu, G, R, W, A, b_alg );
  }
  *dtime_tevd = FLA_Clock() - dtime_temp;

	// Copy the converged eigenvalues to the output vector.
	FLA_Copy( d, l );

	// Sort the eigenvalues and eigenvectors in ascending order.
	FLA_Sort_evd( FLA_FORWARD, l, A );

	FLA_Obj_free( &T );
	FLA_Obj_free( &r );
	FLA_Obj_free( &d );
	FLA_Obj_free( &e );
	FLA_Obj_free( &d0 );
	FLA_Obj_free( &pu );
	FLA_Obj_free( &e0 );
	FLA_Obj_free( &ls );
	FLA_Obj_free( &G );
	FLA_Obj_free( &R );
	FLA_Obj_free( &W );

	return r_val;
}
Example #2
0
int main(int argc, char *argv[])
{
  int 
    m_input,
    m,
    p_first, p_last, p_inc,
    p,
    k_accum,
    b_alg,
    n_iter_max,
    variant,
    n_repeats,
    i,
    n_variants = 2;

  char *colors = "brkgmcbrkg";
  char *ticks  = "o+*xso+*xs";
  char m_dim_desc[14];
  char m_dim_tag[10];

  double max_gflops=6.0;

  double
    dtime,
    gflops,
    diff1, diff2;

  FLA_Datatype datatype, dt_real;

  FLA_Obj
    A, l, Q, Ql, TT, r, d, e, A_orig, G, R, W2, de, alpha;

  FLA_Init();


  fprintf( stdout, "%c number of repeats:", '%' );
  scanf( "%d", &n_repeats );
  fprintf( stdout, "%c %d\n", '%', n_repeats );

  fprintf( stdout, "%c enter n_iter_max (per eigenvalue): ", '%' );
  scanf( "%d", &n_iter_max );
  fprintf( stdout, "%c %d\n", '%', n_iter_max );

  fprintf( stdout, "%c enter number of sets of Givens rotations to accumulate:", '%' );
  scanf( "%d", &k_accum );
  fprintf( stdout, "%c %d\n", '%', k_accum );

  fprintf( stdout, "%c enter blocking size for application of G:", '%' );
  scanf( "%d", &b_alg );
  fprintf( stdout, "%c %d\n", '%', b_alg );

  fprintf( stdout, "%c enter problem size first, last, inc:", '%' );
  scanf( "%d%d%d", &p_first, &p_last, &p_inc );
  fprintf( stdout, "%c %d %d %d\n", '%', p_first, p_last, p_inc );

  fprintf( stdout, "%c enter m (-1 means bind to problem size): ", '%' );
  scanf( "%d", &m_input );
  fprintf( stdout, "%c %d\n", '%', m_input );


  fprintf( stdout, "\n" );


  if     ( m_input >  0 ) {
    sprintf( m_dim_desc, "m = %d", m_input );
    sprintf( m_dim_tag,  "m%dc", m_input);
  }
  else if( m_input <  -1 ) {
    sprintf( m_dim_desc, "m = p/%d", -m_input );
    sprintf( m_dim_tag,  "m%dp", -m_input );
  }
  else if( m_input == -1 ) {
    sprintf( m_dim_desc, "m = p" );
    sprintf( m_dim_tag,  "m%dp", 1 );
  }


  for ( p = p_first, i = 1; p <= p_last; p += p_inc, i += 1 )
  {

    m = m_input;

    if( m < 0 ) m = p / abs(m_input);

    //datatype = FLA_FLOAT;
    //datatype = FLA_DOUBLE;
    //datatype = FLA_COMPLEX;
    datatype = FLA_DOUBLE_COMPLEX;

    FLA_Obj_create( datatype,  m,         m, 0, 0, &A );
    FLA_Obj_create( datatype,  m,         m, 0, 0, &A_orig );
    FLA_Obj_create( datatype,  m,         m, 0, 0, &Q );
    FLA_Obj_create( datatype,  m,         m, 0, 0, &Ql );
    FLA_Obj_create( datatype,  m,         1, 0, 0, &r );
    FLA_Obj_create( datatype,  m,         m, 0, 0, &W2 );
    FLA_Obj_create( datatype,  m-1, k_accum, 0, 0, &G );

	dt_real = FLA_Obj_datatype_proj_to_real( A );

    FLA_Obj_create( dt_real, m,      1, 0, 0, &l );
    FLA_Obj_create( dt_real, m,      1, 0, 0, &d );
    FLA_Obj_create( dt_real, m-1,    1, 0, 0, &e );
    FLA_Obj_create( dt_real, m,      m, 0, 0, &R );

	FLA_Obj_create( dt_real, 1,      1, 0, 0, &alpha );

	*FLA_DOUBLE_PTR( alpha ) = 1.0 / ( sqrt( sqrt( (double) m ) ) );

	FLA_Random_unitary_matrix( Q );

	//FLA_Fill_with_uniform_dist( FLA_ONE,   l );
	//FLA_Fill_with_inverse_dist( FLA_ONE,   l );
	FLA_Fill_with_geometric_dist( alpha,   l );


    {
      FLA_Copy( Q, Ql );
      FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, l, Ql );
      FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
                FLA_ONE, Ql, Q, FLA_ZERO, A );
      FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, A );
      FLA_Copy( A, A_orig );
    }

    FLA_Set( FLA_ZERO, l );
    FLA_Set( FLA_ZERO, Q );

	FLA_Tridiag_UT_create_T( A, &TT );
	FLA_Tridiag_UT( FLA_LOWER_TRIANGULAR, A, TT );
	FLA_Tridiag_UT_realify( FLA_LOWER_TRIANGULAR, A, r );
	FLA_Tridiag_UT_extract_diagonals( FLA_LOWER_TRIANGULAR, A, d, e );
	FLA_Tridiag_UT_form_Q( FLA_LOWER_TRIANGULAR, A, TT );
	FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, r, A );
    FLA_Obj_free( &TT );

    time_Tevd_v( 0, FLA_ALG_REFERENCE, n_repeats, m, k_accum, b_alg, n_iter_max,
                 A_orig, d, e, G, R, W2, A, l, &dtime, &diff1, &diff2, &gflops );

    fprintf( stdout, "data_REFq( %d, 1:3 ) = [ %d %6.3lf %9.2e %6.2le %6.2le ]; \n", i, p, gflops, dtime, diff1, diff2 );
    fflush( stdout );

    for ( variant = 1; variant <= n_variants; variant++ ){
      
      fprintf( stdout, "data_var%d( %d, 1:3 ) = [ %d ", variant, i, p );
      fflush( stdout );

      time_Tevd_v( variant, FLA_ALG_UNB_OPT, n_repeats, m, k_accum, b_alg, n_iter_max,
                   A_orig, d, e, G, R, W2, A, l, &dtime, &diff1, &diff2, &gflops );

      fprintf( stdout, "%6.3lf %9.2e %6.2le %6.2le ", gflops, dtime, diff1, diff2 );
      fflush( stdout );

      fprintf( stdout, "];\n" );
      fflush( stdout );
    }

    fprintf( stdout, "\n" );

    FLA_Obj_free( &A );
    FLA_Obj_free( &A_orig );
    FLA_Obj_free( &Q );
    FLA_Obj_free( &Ql );
    FLA_Obj_free( &G );
    FLA_Obj_free( &W2 );
    FLA_Obj_free( &r );
    FLA_Obj_free( &l );
    FLA_Obj_free( &d );
    FLA_Obj_free( &e );
    FLA_Obj_free( &R );
    FLA_Obj_free( &alpha );
  }

/*
  fprintf( stdout, "figure;\n" );

  fprintf( stdout, "plot( data_REF( :,1 ), data_REF( :, 2 ), '-' ); \n" );

  fprintf( stdout, "hold on;\n" );

  for ( i = 1; i <= n_variants; i++ ) {
    fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 2 ), '%c:%c' ); \n",
            i, i, colors[ i-1 ], ticks[ i-1 ] );
    fprintf( stdout, "plot( data_var%d( :,1 ), data_var%d( :, 4 ), '%c-.%c' ); \n",
            i, i, colors[ i-1 ], ticks[ i-1 ] );
  }

  fprintf( stdout, "legend( ... \n" );
  fprintf( stdout, "'Reference', ... \n" );

  for ( i = 1; i < n_variants; i++ )
    fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d', ... \n", i, i );
  fprintf( stdout, "'unb\\_var%d', 'blk\\_var%d' ); \n", i, i );

  fprintf( stdout, "xlabel( 'problem size p' );\n" );
  fprintf( stdout, "ylabel( 'GFLOPS/sec.' );\n" );
  fprintf( stdout, "axis( [ 0 %d 0 %.2f ] ); \n", p_last, max_gflops );
  fprintf( stdout, "title( 'FLAME Hevd_lv performance (%s, %s)' );\n", 
           m_dim_desc, n_dim_desc );
  fprintf( stdout, "print -depsc tridiag_%s_%s.eps\n", m_dim_tag, n_dim_tag );
  fprintf( stdout, "hold off;\n");
  fflush( stdout );
*/

  FLA_Finalize( );

  return 0;
}