FLA_Error FLA_Svd_uv_unb_var1( dim_t n_iter_max, FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V, dim_t k_accum, dim_t b_alg )
{
    FLA_Error    r_val = FLA_SUCCESS;
    FLA_Datatype dt;
    FLA_Datatype dt_real;
    FLA_Datatype dt_comp;
    FLA_Obj      scale, T, S, rL, rR, d, e, G, H;
    dim_t        m_A, n_A;
    dim_t        min_m_n;
    dim_t        n_GH;
    double       crossover_ratio = 17.0 / 9.0;

    n_GH    = k_accum;

    m_A     = FLA_Obj_length( A );
    n_A     = FLA_Obj_width( A );
    min_m_n = FLA_Obj_min_dim( A );
    dt      = FLA_Obj_datatype( A );
    dt_real = FLA_Obj_datatype_proj_to_real( A );
    dt_comp = FLA_Obj_datatype_proj_to_complex( A );

    // Create matrices to hold block Householder transformations.
    FLA_Bidiag_UT_create_T( A, &T, &S );

    // Create vectors to hold the realifying scalars.
    FLA_Obj_create( dt,      min_m_n,      1, 0, 0, &rL );
    FLA_Obj_create( dt,      min_m_n,      1, 0, 0, &rR );

    // Create vectors to hold the diagonal and sub-diagonal.
    FLA_Obj_create( dt_real, min_m_n,      1, 0, 0, &d );
    FLA_Obj_create( dt_real, min_m_n-1,    1, 0, 0, &e );

    // Create matrices to hold the left and right Givens scalars.
    FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &G );
    FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &H );

    // Create a real scaling factor.
    FLA_Obj_create( dt_real, 1, 1, 0, 0, &scale );

    // Compute a scaling factor; If none is needed, sigma will be set to one.
    FLA_Svd_compute_scaling( A, scale );

    // Scale the matrix if scale is non-unit.
    if ( !FLA_Obj_equals( scale, FLA_ONE ) )
        FLA_Scal( scale, A );

    if ( m_A < crossover_ratio * n_A )
    {
        // Reduce the matrix to bidiagonal form.
        // Apply scalars to rotate elements on the superdiagonal to the real domain.
        // Extract the diagonal and superdiagonal from A.
        FLA_Bidiag_UT( A, T, S );
        FLA_Bidiag_UT_realify( A, rL, rR );
        FLA_Bidiag_UT_extract_real_diagonals( A, d, e );

        // Form U and V.
        FLA_Bidiag_UT_form_U( A, T, U );
        FLA_Bidiag_UT_form_V( A, S, V );

        // Apply the realifying scalars in rL and rR to U and V, respectively.
        {
            FLA_Obj UL, UR;
            FLA_Obj VL, VR;

            FLA_Part_1x2( U,   &UL, &UR,   min_m_n, FLA_LEFT );
            FLA_Part_1x2( V,   &VL, &VR,   min_m_n, FLA_LEFT );

            FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE,    rL, UL );
            FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, VL );
        }

        // Perform a singular value decomposition on the bidiagonal matrix.
        r_val = FLA_Bsvd_v_opt_var1( n_iter_max, d, e, G, H, U, V, b_alg );
    }
    else // if ( crossover_ratio * n_A <= m_A )
    {
        FLA_Obj TQ, R;
        FLA_Obj AT,
                AB;
        FLA_Obj UL, UR;

        // Perform a QR factorization on A and form Q in U.
        FLA_QR_UT_create_T( A, &TQ );
        FLA_QR_UT( A, TQ );
        FLA_QR_UT_form_Q( A, TQ, U );
        FLA_Obj_free( &TQ );

        // Set the lower triangle of R to zero and then copy the upper
        // triangle of A to R.
        FLA_Part_2x1( A,   &AT,
                           &AB,   n_A, FLA_TOP );
        FLA_Obj_create( dt, n_A, n_A, 0, 0, &R );
        FLA_Setr( FLA_LOWER_TRIANGULAR, FLA_ZERO, R );
        FLA_Copyr( FLA_UPPER_TRIANGULAR, AT, R );

        // Reduce the matrix to bidiagonal form.
        // Apply scalars to rotate elements on the superdiagonal to the real domain.
        // Extract the diagonal and superdiagonal from A.
        FLA_Bidiag_UT( R, T, S );
        FLA_Bidiag_UT_realify( R, rL, rR );
        FLA_Bidiag_UT_extract_real_diagonals( R, d, e );

        // Form V from right Householder vectors in upper triangle of R.
        FLA_Bidiag_UT_form_V( R, S, V );

        // Form U in R.
        FLA_Bidiag_UT_form_U( R, T, R );

        // Apply the realifying scalars in rL and rR to U and V, respectively.
        FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE,    rL, R );
        FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, V );

        // Perform a singular value decomposition on the bidiagonal matrix.
        r_val = FLA_Bsvd_v_opt_var1( n_iter_max, d, e, G, H, R, V, b_alg );

        // Multiply R into U, storing the result in A and then copying back
        // to U.
        FLA_Part_1x2( U,   &UL, &UR,   n_A, FLA_LEFT );
        FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
                  FLA_ONE, UL, R, FLA_ZERO, A );
        FLA_Copy( A, UL );

        FLA_Obj_free( &R );
    }

    // Copy the converged eigenvalues to the output vector.
    FLA_Copy( d, s );

    // Sort the singular values and singular vectors in descending order.
    FLA_Sort_svd( FLA_BACKWARD, s, U, V );

    // If the matrix was scaled, rescale the singular values.
    if ( !FLA_Obj_equals( scale, FLA_ONE ) )
        FLA_Inv_scal( scale, s );

    FLA_Obj_free( &scale );
    FLA_Obj_free( &T );
    FLA_Obj_free( &S );
    FLA_Obj_free( &rL );
    FLA_Obj_free( &rR );
    FLA_Obj_free( &d );
    FLA_Obj_free( &e );
    FLA_Obj_free( &G );
    FLA_Obj_free( &H );

    return r_val;
}
FLA_Error FLA_Svd_uv_var2_components( dim_t n_iter_max, dim_t k_accum, dim_t b_alg,
                                      FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V,
                                      double* dtime_bred, double* dtime_bsvd, double* dtime_appq,
                                      double* dtime_qrfa, double* dtime_gemm )
{
	FLA_Error    r_val = FLA_SUCCESS;
	FLA_Datatype dt;
	FLA_Datatype dt_real;
	FLA_Datatype dt_comp;
	FLA_Obj      T, S, rL, rR, d, e, G, H, RG, RH, W;
	dim_t        m_A, n_A;
	dim_t        min_m_n;
	dim_t        n_GH;
	double       crossover_ratio = 17.0 / 9.0;
	double       dtime_temp;

	n_GH    = k_accum;

	m_A     = FLA_Obj_length( A );
	n_A     = FLA_Obj_width( A );
	min_m_n = FLA_Obj_min_dim( A );
	dt      = FLA_Obj_datatype( A );
	dt_real = FLA_Obj_datatype_proj_to_real( A );
	dt_comp = FLA_Obj_datatype_proj_to_complex( A );

	// If the matrix is a scalar, then the SVD is easy.
	if ( min_m_n == 1 )
	{
		FLA_Copy( A, s );
		FLA_Set_to_identity( U );
		FLA_Set_to_identity( V );

		return FLA_SUCCESS;
	}

	// Create matrices to hold block Householder transformations.
	FLA_Bidiag_UT_create_T( A, &T, &S );

	// Create vectors to hold the realifying scalars.
	FLA_Obj_create( dt,      min_m_n,      1, 0, 0, &rL );
	FLA_Obj_create( dt,      min_m_n,      1, 0, 0, &rR );

	// Create vectors to hold the diagonal and sub-diagonal.
	FLA_Obj_create( dt_real, min_m_n,      1, 0, 0, &d );
	FLA_Obj_create( dt_real, min_m_n-1,    1, 0, 0, &e );

	// Create matrices to hold the left and right Givens scalars.
	FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &G );
	FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &H );

	// Create matrices to hold the left and right Givens matrices.
	FLA_Obj_create( dt_real, min_m_n, min_m_n, 0, 0, &RG );
	FLA_Obj_create( dt_real, min_m_n, min_m_n, 0, 0, &RH );
	FLA_Obj_create( dt,      m_A,     n_A,     0, 0, &W );

	if ( m_A >= n_A )
	{
		if ( m_A < crossover_ratio * n_A )
		{
			dtime_temp = FLA_Clock();
			{
			// Reduce the matrix to bidiagonal form.
			// Apply scalars to rotate elements on the sub-diagonal to the real domain.
			// Extract the diagonal and sub-diagonal from A.
			FLA_Bidiag_UT( A, T, S );
			FLA_Bidiag_UT_realify( A, rL, rR );
			FLA_Bidiag_UT_extract_diagonals( A, d, e );
			}
			*dtime_bred = FLA_Clock() - dtime_temp;

			dtime_temp = FLA_Clock();
			{
			// Form U and V.
			FLA_Bidiag_UT_form_U( A, T, U );
			FLA_Bidiag_UT_form_V( A, S, V );
			}
			*dtime_appq = FLA_Clock() - dtime_temp;

			// Apply the realifying scalars in rL and rR to U and V, respectively.
			{
				FLA_Obj UL, UR;
				FLA_Obj VL, VR;

				FLA_Part_1x2( U,   &UL, &UR,   min_m_n, FLA_LEFT );
				FLA_Part_1x2( V,   &VL, &VR,   min_m_n, FLA_LEFT );

				FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE,    rL, UL );
				FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, VL );
			}

			dtime_temp = FLA_Clock();
			{
			// Perform a singular value decomposition on the bidiagonal matrix.
			r_val = FLA_Bsvd_v_opt_var2( n_iter_max, d, e, G, H, RG, RH, W, U, V, b_alg );
			}
			*dtime_bsvd = FLA_Clock() - dtime_temp;
		}
		else // if ( crossover_ratio * n_A <= m_A )
		{
			FLA_Obj TQ, R;
			FLA_Obj AT,
			        AB;
			FLA_Obj UL, UR;

			//FLA_QR_UT_create_T( A, &TQ );
			FLA_Obj_create( dt, 32, n_A, 0, 0, &TQ );

			dtime_temp = FLA_Clock();
			{
			// Perform a QR factorization on A and form Q in U.
			FLA_QR_UT( A, TQ );
			}
			*dtime_qrfa = FLA_Clock() - dtime_temp;

			dtime_temp = FLA_Clock();
			{
			FLA_QR_UT_form_Q( A, TQ, U );
			}
			*dtime_appq = FLA_Clock() - dtime_temp;

			FLA_Obj_free( &TQ );

			// Set the lower triangle of R to zero and then copy the upper
			// triangle of A to R.
			FLA_Part_2x1( A,   &AT,
			                   &AB,   n_A, FLA_TOP );
			FLA_Obj_create( dt, n_A, n_A, 0, 0, &R );
			FLA_Setr( FLA_LOWER_TRIANGULAR, FLA_ZERO, R );
			FLA_Copyr( FLA_UPPER_TRIANGULAR, AT, R );

			dtime_temp = FLA_Clock();
			{
			// Reduce the matrix to bidiagonal form.
			// Apply scalars to rotate elements on the superdiagonal to the real domain.
			// Extract the diagonal and superdiagonal from A.
			FLA_Bidiag_UT( R, T, S );
			FLA_Bidiag_UT_realify( R, rL, rR );
			FLA_Bidiag_UT_extract_diagonals( R, d, e );
			}
			*dtime_bred = FLA_Clock() - dtime_temp;

			dtime_temp = FLA_Clock();
			{
			// Form V from right Householder vectors in upper triangle of R.
			FLA_Bidiag_UT_form_V( R, S, V );

			// Form U in R.
			FLA_Bidiag_UT_form_U( R, T, R );
			}
			*dtime_appq += FLA_Clock() - dtime_temp;

			// Apply the realifying scalars in rL and rR to U and V, respectively.
			FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE,    rL, R );
			FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, V );

			dtime_temp = FLA_Clock();
			{
			// Perform a singular value decomposition on the bidiagonal matrix.
			r_val = FLA_Bsvd_v_opt_var2( n_iter_max, d, e, G, H, RG, RH, W, R, V, b_alg );
			}
			*dtime_bsvd = FLA_Clock() - dtime_temp;

			dtime_temp = FLA_Clock();
			{
			// Multiply R into U, storing the result in A and then copying back
			// to U.
			FLA_Part_1x2( U,   &UL, &UR,   n_A, FLA_LEFT );
			FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
			          FLA_ONE, UL, R, FLA_ZERO, A );
			FLA_Copy( A, UL );
			}
			*dtime_gemm = FLA_Clock() - dtime_temp;

			FLA_Obj_free( &R );
		}
	}
	else // if ( m_A < n_A )
	{
		FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
	}

	// Copy the converged eigenvalues to the output vector.
	FLA_Copy( d, s );

	// Sort the singular values and singular vectors in descending order.
	FLA_Sort_svd( FLA_BACKWARD, s, U, V );

	FLA_Obj_free( &T );
	FLA_Obj_free( &S );
	FLA_Obj_free( &rL );
	FLA_Obj_free( &rR );
	FLA_Obj_free( &d );
	FLA_Obj_free( &e );
	FLA_Obj_free( &G );
	FLA_Obj_free( &H );
	FLA_Obj_free( &RG );
	FLA_Obj_free( &RH );
	FLA_Obj_free( &W );

	return r_val;
}
Exemple #3
0
int main( int argc, char** argv ) {
  FLA_Datatype datatype = TESTTYPE;
  FLA_Obj      A, A_flame, A_lapack, C;
  int          m;
  FLA_Error    init_result; 

  FLA_Obj TU, TV, U_flame, V_flame, d_flame, e_flame, B_flame;
  FLA_Obj tauq, taup, d_lapack, e_lapack, U_lapack, V_lapack, W, B_lapack;
  testtype *buff_tauq, *buff_taup, *buff_d_lapack, *buff_e_lapack, 
    *buff_W, *buff_A_lapack, *buff_U_lapack, *buff_V_lapack;
  int lwork, info, is_flame;
  
  if ( argc == 3 ) {
    m = atoi(argv[1]);
    is_flame = atoi(argv[2]);
  } else {
    fprintf(stderr, "       \n");
    fprintf(stderr, "Usage: %s m is_flame\n", argv[0]);
    fprintf(stderr, "       m : matrix length\n");
    fprintf(stderr, "       is_flame : 1 yes, 0 no\n");
    fprintf(stderr, "       \n");
    return -1;
  }
  if ( m == 0 )
    return 0;

  FLA_Init_safe( &init_result );          

  fprintf( stdout, "lapack2flame: %d x %d: \n", m, m);

  FLA_Obj_create( datatype, m, m, 0, 0, &A );
  FLA_Random_matrix( A ); 
  FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_flame  );
  FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_lapack );

  FLA_Obj_create( datatype, m, m, 0, 0, &C );
  FLA_Random_matrix( C ); 


  if ( is_flame ) {
    fprintf( stdout, " flame executed\n");
    FLA_Bidiag_UT_create_T( A_flame, &TU, &TV );

    FLA_Bidiag_UT( A_flame, TU, TV );
    FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A_flame, &U_flame );
    FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A_flame, &V_flame );

    FLA_Bidiag_UT_form_U( U_flame, TU, U_flame );
    FLA_Bidiag_UT_form_V( V_flame, TV, V_flame );
    
    FLA_Obj_create( datatype, m,      1, 0, 0, &d_flame );
    FLA_Obj_create( datatype, m - 1,  1, 0, 0, &e_flame );
    FLA_Bidiag_UT_extract_diagonals( A_flame, d_flame, e_flame );

    FLA_Obj_create( datatype, m, m, 0, 0, &B_flame ); FLA_Set( FLA_ZERO, B_flame );
    {
      FLA_Obj BTL, BTR, BBL, BBR;
      FLA_Part_2x2( B_flame, &BTL, &BTR, &BBL, &BBR, 1,1, FLA_BL );
      FLA_Set_diagonal_matrix( d_flame, B_flame );
      FLA_Set_diagonal_matrix( e_flame, BTR );
    }

    if (1) {
      fprintf( stdout, " - FLAME ----------\n");
      FLA_Obj_fshow( stdout, " - Given A - ", A, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - A - ", A_flame, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - U - ", U_flame, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - V - ", V_flame, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - d - ", d_flame, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - e - ", e_flame, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - B - ", B_flame, "% 6.4e", "------");
    }
  } else {
    fprintf( stdout, " lapack executed\n");

    FLA_Obj_create( datatype, m, 1, 0, 0, &tauq );
    FLA_Obj_create( datatype, m, 1, 0, 0, &taup );
    FLA_Obj_create( datatype, m,      1, 0, 0, &d_lapack );
    FLA_Obj_create( datatype, m - 1,  1, 0, 0, &e_lapack );

    buff_A_lapack = (testtype*)FLA_Obj_buffer_at_view( A_lapack );
    buff_tauq     = (testtype*)FLA_Obj_buffer_at_view( tauq );
    buff_taup     = (testtype*)FLA_Obj_buffer_at_view( taup );
    buff_d_lapack = (testtype*)FLA_Obj_buffer_at_view( d_lapack );
    buff_e_lapack = (testtype*)FLA_Obj_buffer_at_view( e_lapack );

    lwork = 32*m;
    
    FLA_Obj_create( datatype, lwork, 1, 0, 0, &W );
    buff_W = (testtype*)FLA_Obj_buffer_at_view( W );
    sgebrd_( &m, &m, 
             buff_A_lapack, &m,
             buff_d_lapack,
             buff_e_lapack,
             buff_tauq,
             buff_taup,
             buff_W,
             &lwork,
             &info );

    FLA_Obj_create( datatype, m, m, 0, 0, &U_lapack );
    FLA_Obj_create( datatype, m, m, 0, 0, &V_lapack );
    
    FLA_Copy( A_lapack, U_lapack );
    FLA_Copy( A_lapack, V_lapack );

    buff_U_lapack = (testtype*)FLA_Obj_buffer_at_view( U_lapack );
    buff_V_lapack = (testtype*)FLA_Obj_buffer_at_view( V_lapack );

    sorgbr_( "Q", &m, &m, &m,
             buff_U_lapack, &m,
             buff_tauq, 
             buff_W,
             &lwork,
             &info );
    
    sorgbr_( "P", &m, &m, &m,
             buff_V_lapack, &m,
             buff_taup,
             buff_W,
             &lwork,
             &info );

    FLA_Obj_create( datatype, m, m, 0, 0, &B_lapack ); FLA_Set( FLA_ZERO, B_lapack );
    {
      FLA_Obj BTL, BTR, BBL, BBR;
      FLA_Part_2x2( B_lapack, &BTL, &BTR, &BBL, &BBR, 1,1, FLA_BL );
      FLA_Set_diagonal_matrix( d_lapack, B_lapack );
      FLA_Set_diagonal_matrix( e_lapack, BTR );
    }
    
    FLA_Obj_free( &W );    


    if (1) {
      fprintf( stdout, " - LAPACK ----------\n");
      FLA_Obj_fshow( stdout, " - Given A - ", A, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - A - ", A_lapack, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - U - ", U_lapack, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - V - ", V_lapack, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - d - ", d_lapack, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - e - ", e_lapack, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - B - ", B_lapack, "% 6.4e", "------");
    }
  }

  {
    testtype     dummy;
    int          zero = 0, one = 1;
    FLA_Obj      D_lapack;

    FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &D_lapack ); FLA_Set( FLA_ZERO, D_lapack );

    if ( is_flame ) {
      buff_d_lapack = (testtype*)FLA_Obj_buffer_at_view( d_flame );
      buff_e_lapack = (testtype*)FLA_Obj_buffer_at_view( e_flame );
      buff_U_lapack = (testtype*)FLA_Obj_buffer_at_view( U_flame );
      buff_V_lapack = (testtype*)FLA_Obj_buffer_at_view( V_flame );
    }

    FLA_Obj_create( datatype, 4*m, 1, 0, 0, &W );
    buff_W = (testtype*)FLA_Obj_buffer_at_view( W );
    sbdsqr_( "U", &m, &m, &m, &zero, 
             buff_d_lapack, buff_e_lapack, 
             buff_V_lapack, &m, 
             buff_U_lapack, &m, 
             &dummy, &one, 
             buff_W, &info );
    FLA_Obj_free( &W );
    if (info != 0)
      printf( " Error info = %d\n", info );

    if ( is_flame )
      FLA_Set_diagonal_matrix( d_flame, D_lapack );
    else
      FLA_Set_diagonal_matrix( d_lapack, D_lapack );

    if ( is_flame ) {
      fprintf( stdout, " - FLAME ----------\n");
      FLA_Obj_fshow( stdout, " - U - ", U_flame, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - V - ", V_flame, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - d - ", d_flame, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - e - ", e_flame, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - D - ", D_lapack, "% 6.4e", "------");
    } else {
      fprintf( stdout, " - LAPACK ----------\n");
      FLA_Obj_fshow( stdout, " - U - ", U_lapack, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - V - ", V_lapack, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - d - ", d_lapack, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - e - ", e_lapack, "% 6.4e", "------");
      FLA_Obj_fshow( stdout, " - D - ", D_lapack, "% 6.4e", "------");
    }

    FLA_Obj_free( &D_lapack );
  }

  if ( is_flame ) {
    FLA_Obj_free( &TU );
    FLA_Obj_free( &TV );
    FLA_Obj_free( &U_flame );
    FLA_Obj_free( &V_flame );
    FLA_Obj_free( &d_flame );
    FLA_Obj_free( &e_flame );
    FLA_Obj_free( &B_flame );
  } else {
    FLA_Obj_free( &tauq );
    FLA_Obj_free( &taup );
    FLA_Obj_free( &d_lapack );
    FLA_Obj_free( &e_lapack );
    FLA_Obj_free( &U_lapack );
    FLA_Obj_free( &V_lapack );
    FLA_Obj_free( &B_lapack );
  }
  FLA_Obj_free( &A );
  FLA_Obj_free( &A_flame );
  FLA_Obj_free( &A_lapack );

  FLA_Obj_free( &C );

  FLA_Finalize_safe( init_result );     
}
Exemple #4
0
void time_Bidiag_UT(
                 int param_combo, int type, int nrepeats, int m, int n,
                 FLA_Obj A, FLA_Obj tu, FLA_Obj tv, FLA_Obj TU, FLA_Obj TV,
                 double *dtime, double *diff, double *gflops )
{
  int
    irep;

  double
    dtime_old = 1.0e9;

  FLA_Obj
    A_save, norm;


  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save );

  FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );

  FLA_Copy_external( A, A_save );

  for ( irep = 0 ; irep < nrepeats; irep++ ){

    FLA_Copy_external( A_save, A );

    *dtime = FLA_Clock();

    switch( param_combo ){

    case 0:
    {
      switch( type )
      {
      case FLA_ALG_REFERENCE:
        REF_Bidiag_UT( A, tu, tv );
        break;
      case FLA_ALG_FRONT:
        FLA_Bidiag_UT( A, TU, TV );
        break;
      }

      break;
    }

    }

    *dtime = FLA_Clock() - *dtime;
    dtime_old = min( *dtime, dtime_old );

  }

  {
    FLA_Obj AL, AR;
    FLA_Obj ATL, ATR,
            ABL, ABR;
    FLA_Obj QU;
    FLA_Obj QV, QVL, QVR;
    FLA_Obj E, EL, ER;
    FLA_Obj F;
    FLA_Obj WU, WV, eye;
    FLA_Obj tvT,
            tvB;
    dim_t   m_A, n_A, m_TU;


//FLA_Obj_show( "A_save", A_save, "%10.3e", "" );

    m_A = FLA_Obj_length( A );
    n_A = FLA_Obj_width( A );
    m_TU = FLA_Obj_length( TU );

    FLA_Obj_create( FLA_Obj_datatype( A ), m_A,  m_A, 0, 0, &QU );
    FLA_Obj_create( FLA_Obj_datatype( A ), n_A,  n_A, 0, 0, &QV );

    FLA_Obj_create( FLA_Obj_datatype( A ), m_TU,  m_A, 0, 0, &WU );
    FLA_Obj_create( FLA_Obj_datatype( A ), m_TU,  n_A, 0, 0, &WV );

    FLA_Set_to_identity( QU );
    FLA_Set_to_identity( QV );

    FLA_Part_1x2( QV,   &QVL, &QVR,   1, FLA_LEFT );
    FLA_Part_1x2( A,    &AL,  &AR,    1, FLA_LEFT );
    FLA_Part_2x2( A,    &ATL, &ATR,
                        &ABL, &ABR,   1, 1, FLA_BL );
    FLA_Part_2x1( tv,   &tvT,
                        &tvB,   1, FLA_BOTTOM );

    if ( type == FLA_ALG_REFERENCE )
    {
      if ( FLA_Obj_is_real( A ) )
        FLA_Apply_Q_blk_external( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A, tu, QU );
      else
        FLA_Apply_Q_blk_external( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, A, tu, QU );
      //FLA_Apply_Q_blk_external( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_ROWWISE, AR, tv, QVR );
      //
      // Need to apply backwards transformation, since vectors are stored columnwise.
      // QL? RQ?
      //
      //FLA_Apply_Q_blk_external( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_ROWWISE, ATR, tvT, QVR );
      //FLA_Apply_Q_blk_external( FLA_RIGHT, FLA_CONJ_TRANSPOSE, FLA_ROWWISE, ATR, tvT, QVR );
      //FLA_Apply_Q_blk_external( FLA_RIGHT, FLA_CONJ_TRANSPOSE, FLA_ROWWISE, AR, tvT, QVR );
    }
    else
    {
      FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A, TU, WU, QU );
      FLA_Apply_Q_UT( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE, AR, TV, WV, QVR );
    }

/*
    FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &eye );     
    FLA_Set_to_identity( eye );

    //FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
    //          FLA_ONE, QV, QV, FLA_MINUS_ONE, eye );
    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
              FLA_ONE, QU, QU, FLA_MINUS_ONE, eye );

FLA_Obj_show( "eye", eye, "%10.3e", "" );
    FLA_Norm_frob( eye, norm );
    FLA_Obj_extract_real_scalar( norm, diff );
    FLA_Obj_free( &eye );
*/

    FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &E );
    FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &F );

    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
              FLA_ONE, A_save, QV, FLA_ZERO, E );
    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
              FLA_ONE, QU, E, FLA_ZERO, F );

//FLA_Obj_show( "A_save", A_save, "%10.3e", "" );

    FLA_Copy( A, E );
    FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, E );
    FLA_Part_1x2( E,    &EL, &ER,      1, FLA_LEFT );
    FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, ER );

//FLA_Obj_show( "B", E, "%10.3e", "" );
//FLA_Obj_show( "Q'AV", F, "%10.3e", "" );
//FLA_Obj_show( "B", E, "%10.3e + %10.3e ", "" );
//FLA_Obj_show( "Q'AV", F, "%10.3e + %10.3e ", "" );

    *diff = FLA_Max_elemwise_diff( E, F );
    FLA_Obj_free( &E );
    FLA_Obj_free( &F );

    FLA_Obj_free( &QU );
    FLA_Obj_free( &QV );
    FLA_Obj_free( &WU );
    FLA_Obj_free( &WV );
  }

  *gflops = 4.0 * n * n * ( m - n / 3.0 ) /
            dtime_old / 1e9;
  if ( FLA_Obj_is_complex( A ) )
    *gflops *= 4.0;

  *dtime = dtime_old;

  FLA_Copy_external( A_save, A );

  FLA_Obj_free( &A_save );
  FLA_Obj_free( &norm );
}
FLA_Error FLA_Svd_ext_u_unb_var1( FLA_Svd_type jobu, FLA_Svd_type jobv,
                                  dim_t n_iter_max,
                                  FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V,
                                  dim_t k_accum,
                                  dim_t b_alg )
{
    FLA_Error    r_val = FLA_SUCCESS;
    FLA_Datatype dt;
    FLA_Datatype dt_real;
    FLA_Datatype dt_comp;
    FLA_Obj      scale, T, S, rL, rR, d, e, G, H, C; // C is dummy.
    dim_t        m_A, n_A, min_m_n;
    dim_t        n_GH;
    double       crossover_ratio = 17.0 / 9.0;
    FLA_Bool     u_is_formed = FALSE, 
                 v_is_formed = FALSE;
    int          apply_scale;

    n_GH    = k_accum;

    m_A     = FLA_Obj_length( A );
    n_A     = FLA_Obj_width( A );
    min_m_n = min( m_A, n_A );
    dt      = FLA_Obj_datatype( A );
    dt_real = FLA_Obj_datatype_proj_to_real( A );
    dt_comp = FLA_Obj_datatype_proj_to_complex( A );

    // Create matrices to hold block Householder transformations.
    FLA_Bidiag_UT_create_T( A, &T, &S );

    // Create vectors to hold the realifying scalars.
    if ( FLA_Obj_is_complex( A ) )
    {
        FLA_Obj_create( dt,      min_m_n,      1, 0, 0, &rL );
        FLA_Obj_create( dt,      min_m_n,      1, 0, 0, &rR );
    }

    // Create vectors to hold the diagonal and sub-diagonal.
    FLA_Obj_create( dt_real, min_m_n,      1, 0, 0, &d );
    FLA_Obj_create( dt_real, min_m_n-1,    1, 0, 0, &e );

    // Create matrices to hold the left and right Givens scalars.
    FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &G );
    FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &H );

    // Create a real scaling factor.
    FLA_Obj_create( dt_real, 1, 1, 0, 0, &scale );

    // Scale matrix A if necessary. 
    FLA_Max_abs_value( A, scale );
    apply_scale =
      ( FLA_Obj_gt( scale, FLA_OVERFLOW_SQUARE_THRES  ) == TRUE ) -     
      ( FLA_Obj_lt( scale, FLA_UNDERFLOW_SQUARE_THRES ) == TRUE ); 
    
    if ( apply_scale )
      FLA_Scal( apply_scale > 0 ? FLA_SAFE_MIN : FLA_SAFE_INV_MIN, A );   

    if ( m_A < crossover_ratio * n_A )
    {
        // Reduce the matrix to bidiagonal form.
        // Apply scalars to rotate elements on the superdiagonal to the real domain.
        // Extract the diagonal and superdiagonal from A.
        FLA_Bidiag_UT( A, T, S );
        if ( FLA_Obj_is_complex( A ) )
            FLA_Bidiag_UT_realify( A, rL, rR );
        FLA_Bidiag_UT_extract_real_diagonals( A, d, e );

        // Form U and V.
        if ( u_is_formed == FALSE )
        {
            switch ( jobu )
            {
            case FLA_SVD_VECTORS_MIN_OVERWRITE:
                if ( jobv != FLA_SVD_VECTORS_NONE )
                    FLA_Bidiag_UT_form_V_ext( FLA_UPPER_TRIANGULAR, A, S, FLA_NO_TRANSPOSE, V );
                v_is_formed = TRUE; // For this case, V should be formed here.
                U = A;
            case FLA_SVD_VECTORS_ALL:
            case FLA_SVD_VECTORS_MIN_COPY:
                FLA_Bidiag_UT_form_U_ext( FLA_UPPER_TRIANGULAR, A, T, FLA_NO_TRANSPOSE, U );
                u_is_formed = TRUE;
                break;
            case FLA_SVD_VECTORS_NONE:
                // Do nothing
                break;
            }
        }
        if ( v_is_formed == FALSE )
        {
            if ( jobv == FLA_SVD_VECTORS_MIN_OVERWRITE )
            {
                FLA_Bidiag_UT_form_V_ext( FLA_UPPER_TRIANGULAR, A, S, FLA_CONJ_TRANSPOSE, A );
                v_is_formed = TRUE; /* and */
                V = A; // This V is actually V^H.

                // V^H -> V
                FLA_Obj_flip_base( &V );
                FLA_Obj_flip_view( &V );
                if ( FLA_Obj_is_complex( A ) )
                    FLA_Conjugate( V );
            }
            else if ( jobv != FLA_SVD_VECTORS_NONE )
            {
                FLA_Bidiag_UT_form_V_ext( FLA_UPPER_TRIANGULAR, A, S, FLA_NO_TRANSPOSE, V );
                v_is_formed = TRUE;
            }
        }

        // For complex matrices, apply realification transformation.
        if ( FLA_Obj_is_complex( A ) && jobu != FLA_SVD_VECTORS_NONE )
        {
            FLA_Obj UL, UR;
            FLA_Part_1x2( U,   &UL, &UR,   min_m_n, FLA_LEFT );
            FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE,    rL, UL );
        }
        if ( FLA_Obj_is_complex( A ) && jobv != FLA_SVD_VECTORS_NONE )
        {
            FLA_Obj VL, VR;
            FLA_Part_1x2( V,   &VL, &VR,   min_m_n, FLA_LEFT );
            FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, VL );
        }

        // Perform a singular value decomposition on the upper bidiagonal matrix.
        r_val = FLA_Bsvd_ext_opt_var1( n_iter_max,
                                       d, e, G, H,
                                       jobu, U, jobv, V,
                                       FALSE, C, // C is not referenced
                                       b_alg );
    }
    else // if ( crossover_ratio * n_A <= m_A )
    {
        FLA_Obj TQ, R;
        FLA_Obj AT,
                AB;

        // Perform a QR factorization on A.
        FLA_QR_UT_create_T( A, &TQ );
        FLA_QR_UT( A, TQ );

        // Set the lower triangle of R to zero and then copy the upper
        // triangle of A to R.
        FLA_Part_2x1( A,   &AT,
                           &AB,   n_A, FLA_TOP );
        FLA_Obj_create( dt, n_A, n_A, 0, 0, &R );
        FLA_Setr( FLA_LOWER_TRIANGULAR, FLA_ZERO, R );
        FLA_Copyr( FLA_UPPER_TRIANGULAR, AT, R );

        // Form U; if necessary overwrite on A.
        if ( u_is_formed == FALSE )
        {
            switch ( jobu )
            {
            case FLA_SVD_VECTORS_MIN_OVERWRITE:
                U = A;
            case FLA_SVD_VECTORS_ALL:
            case FLA_SVD_VECTORS_MIN_COPY:
                FLA_QR_UT_form_Q( A, TQ, U );
                u_is_formed = TRUE;
                break;
            case FLA_SVD_VECTORS_NONE:
                // Do nothing
                break;
            }
        }
        FLA_Obj_free( &TQ );

        // Reduce the matrix to bidiagonal form.
        // Apply scalars to rotate elements on the superdiagonal to the real domain.
        // Extract the diagonal and superdiagonal from A.
        FLA_Bidiag_UT( R, T, S );
        if ( FLA_Obj_is_complex( R ) )
            FLA_Bidiag_UT_realify( R, rL, rR );
        FLA_Bidiag_UT_extract_real_diagonals( R, d, e );

        if ( v_is_formed == FALSE )
        {
            if ( jobv == FLA_SVD_VECTORS_MIN_OVERWRITE )
            {
                FLA_Bidiag_UT_form_V_ext( FLA_UPPER_TRIANGULAR, R, S, FLA_CONJ_TRANSPOSE, AT );
                v_is_formed = TRUE; /* and */
                V = AT; // This V is actually V^H.

                // V^H -> V
                FLA_Obj_flip_base( &V );
                FLA_Obj_flip_view( &V );
                if ( FLA_Obj_is_complex( A ) )
                    FLA_Conjugate( V );
            }
            else if ( jobv != FLA_SVD_VECTORS_NONE )
            {
                FLA_Bidiag_UT_form_V_ext( FLA_UPPER_TRIANGULAR, R, S, FLA_NO_TRANSPOSE, V );
                v_is_formed = TRUE;
            }
        }

        // Apply householder vectors U in R.
        FLA_Bidiag_UT_form_U_ext( FLA_UPPER_TRIANGULAR, R, T, FLA_NO_TRANSPOSE, R );

        // Apply the realifying scalars in rL and rR to U and V, respectively.
        if ( FLA_Obj_is_complex( A ) && jobu != FLA_SVD_VECTORS_NONE )
        {
            FLA_Obj RL, RR;
            FLA_Part_1x2( R,   &RL, &RR,   min_m_n, FLA_LEFT );
            FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE,    rL, RL );
        }
        if ( FLA_Obj_is_complex( A ) && jobv != FLA_SVD_VECTORS_NONE )
        {
            FLA_Obj VL, VR;
            FLA_Part_1x2( V,   &VL, &VR,   min_m_n, FLA_LEFT );
            FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, VL );
        }

        // Perform a singular value decomposition on the bidiagonal matrix.
        r_val = FLA_Bsvd_ext_opt_var1( n_iter_max,
                                       d, e, G, H,
                                       jobu, R, jobv, V,
                                       FALSE, C,
                                       b_alg );

        // Multiply R into U, storing the result in A and then copying back
        // to U.
        if ( jobu != FLA_SVD_VECTORS_NONE )
        {
            FLA_Obj UL, UR;
            FLA_Part_1x2( U,   &UL, &UR,   min_m_n, FLA_LEFT );

            if ( jobu == FLA_SVD_VECTORS_MIN_OVERWRITE || 
                 jobv == FLA_SVD_VECTORS_MIN_OVERWRITE )
            {
                FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, UL, &C );
                FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
                          FLA_ONE, UL, R, FLA_ZERO, C );
                FLA_Copy( C, UL );
                FLA_Obj_free( &C );
            }
            else
            {
                FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
                          FLA_ONE, UL, R, FLA_ZERO, A );
                FLA_Copy( A, UL );
            }
        }
        FLA_Obj_free( &R );
    }

    // Copy the converged eigenvalues to the output vector.
    FLA_Copy( d, s );

    // No sort is required as it is applied on FLA_Bsvd.

    if ( apply_scale ) 
      FLA_Scal( apply_scale < 0 ? FLA_SAFE_MIN : FLA_SAFE_INV_MIN, s ); 

    // When V is overwritten, flip it again.
    if ( jobv == FLA_SVD_VECTORS_MIN_OVERWRITE )
    {
        // Always apply conjugation first wrt dimensions used; then, flip base.
        if ( FLA_Obj_is_complex( V ) )
            FLA_Conjugate( V );
        FLA_Obj_flip_base( &V );
    }

    FLA_Obj_free( &scale );
    FLA_Obj_free( &T );
    FLA_Obj_free( &S );

    if ( FLA_Obj_is_complex( A ) )
    {
        FLA_Obj_free( &rL );
        FLA_Obj_free( &rR );
    }

    FLA_Obj_free( &d );
    FLA_Obj_free( &e );
    FLA_Obj_free( &G );
    FLA_Obj_free( &H );

    return r_val;
}
Exemple #6
0
int main( int argc, char** argv ) {
  FLA_Datatype datatype = TESTTYPE;
  FLA_Datatype realtype = REALTYPE;
  FLA_Obj      
    A, TU, TV, 
    A_copy, A_recovered,
    U, V, Vb, B, Be, d, e, 
    DU, DV;

  FLA_Obj     
    ATL, ATR,
    ABL, ABR, Ae;

  FLA_Uplo     uplo;
  dim_t        m, n, min_m_n;
  FLA_Error    init_result; 

  double       residual_A = 0.0;

  if ( argc == 3 ) {
    m = atoi(argv[1]);
    n = atoi(argv[2]);
    min_m_n = min(m,n);
  } else {
    fprintf(stderr, "       \n");
    fprintf(stderr, "Usage: %s m n\n", argv[0]);
    fprintf(stderr, "       m : matrix length\n");
    fprintf(stderr, "       n : matrix width\n");
    fprintf(stderr, "       \n");
    return -1;
  }
  if ( m == 0 || n == 0 )
    return 0;

  FLA_Init_safe( &init_result );          

  // FLAME Bidiag setup
  FLA_Obj_create( datatype, m, n, 0, 0, &A );
  FLA_Bidiag_UT_create_T( A, &TU, &TV );

  // Rand A and create A_copy.
  FLA_Random_matrix( A ); 
  {
    scomplex *buff_A = FLA_Obj_buffer_at_view( A );
    buff_A[0].real = 4.4011e-01; buff_A[0].imag = -4.0150e-09; buff_A[2].real = -2.2385e-01; buff_A[2].imag = -1.5546e-01; buff_A[4].real = -6.3461e-02; buff_A[4].imag = 2.7892e-01; buff_A[6].real = -1.3197e-01; buff_A[6].imag = 5.0888e-01;  
    buff_A[1].real = 3.3352e-01; buff_A[1].imag = -6.6346e-02; buff_A[3].real = -1.9307e-01; buff_A[3].imag = -8.4066e-02; buff_A[5].real = -6.0446e-03; buff_A[5].imag = 2.2094e-01; buff_A[7].real = -2.3299e-02; buff_A[7].imag = 4.0553e-01;
  }

  //FLA_Set_to_identity( A );
  //FLA_Scal( FLA_MINUS_ONE, A );

  if ( m >= n ) {
    uplo = FLA_UPPER_TRIANGULAR;
    FLA_Part_2x2( A, &ATL, &ATR,
                     &ABL, &ABR, min_m_n - 1, 1, FLA_TL );
    Ae = ATR; 
  } else {
    uplo = FLA_LOWER_TRIANGULAR;
    FLA_Part_2x2( A, &ATL, &ATR,
                     &ABL, &ABR, 1, min_m_n - 1, FLA_TL );
    Ae = ABL;
  }

  FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_copy );
  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_recovered );

  // Bidiag test
  {
    FLA_Obj      norm;
    FLA_Bool     apply_scale;

    FLA_Obj_create( realtype, 1,1, 0,0, &norm );

    FLA_Max_abs_value( A, norm );
    apply_scale = FLA_Obj_gt( norm, FLA_OVERFLOW_SQUARE_THRES ); 

    if ( apply_scale ) FLA_Scal( FLA_SAFE_MIN, A );
    FLA_Bidiag_UT( A, TU, TV );
    if ( apply_scale ) FLA_Bidiag_UT_scale_diagonals( FLA_SAFE_INV_MIN, A ); 

    FLA_Obj_free( &norm );
  }


  // Orthonomal basis U, V. 
  FLA_Obj_create( datatype, m, min_m_n, 0, 0, &U ); FLA_Set( FLA_ZERO, U );
  FLA_Obj_create( datatype, min_m_n, n, 0, 0, &V ); FLA_Set( FLA_ZERO, V );

  FLA_Bidiag_UT_form_U_ext( uplo, A, TU, FLA_NO_TRANSPOSE,   U );
  FLA_Bidiag_UT_form_V_ext( uplo, A, TV, FLA_CONJ_TRANSPOSE, V ); 

  if ( FLA_Obj_is_complex( A ) ){
    FLA_Obj rL, rR;
    
    FLA_Obj_create( datatype, min_m_n, 1, 0, 0, &rL );
    FLA_Obj_create( datatype, min_m_n, 1, 0, 0, &rR );

    FLA_Obj_fshow( stdout, " - Factor no realified - ", A, "% 6.4e", "------");
    FLA_Bidiag_UT_realify( A, rL, rR );
    FLA_Obj_fshow( stdout, " - Factor    realified - ", A, "% 6.4e", "------");

    FLA_Obj_fshow( stdout, " - rL - ", rL, "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - rR - ", rR, "% 6.4e", "------");

    FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, rL, U );
    FLA_Apply_diag_matrix( FLA_LEFT,  FLA_CONJUGATE, rR, V );

    FLA_Obj_free( &rL );
    FLA_Obj_free( &rR );
  }

  // U^H U
  FLA_Obj_create( datatype, min_m_n, min_m_n, 0, 0, &DU );
  FLA_Gemm_external( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, 
                     FLA_ONE, U, U, FLA_ZERO, DU );

  // V^H V
  FLA_Obj_create( datatype, min_m_n, min_m_n, 0, 0, &DV );
  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, 
                     FLA_ONE, V, V, FLA_ZERO, DV );
  
  // Recover the matrix
  FLA_Obj_create( datatype, min_m_n, min_m_n, 0, 0, &B );
  FLA_Set( FLA_ZERO, B );

  // Set B
  FLA_Obj_create( datatype, min_m_n, 1, 0, 0, &d );  
  FLA_Set_diagonal_vector( A, d );
  FLA_Set_diagonal_matrix( d, B );
  FLA_Obj_free( &d );

  if ( min_m_n > 1 ) {
    FLA_Obj_create( datatype, min_m_n - 1 , 1, 0, 0, &e );  
    FLA_Set_diagonal_vector( Ae, e );
    if ( uplo == FLA_UPPER_TRIANGULAR ) {
      FLA_Part_2x2( B, &ATL, &ATR,
                    &ABL, &ABR, min_m_n - 1, 1, FLA_TL );
      Be = ATR;
    } else {
      FLA_Part_2x2( B, &ATL, &ATR,
                    &ABL, &ABR, 1, min_m_n - 1, FLA_TL );
      Be = ABL;
    }
    FLA_Set_diagonal_matrix( e, Be );
    FLA_Obj_free( &e );
  }

  // Vb := B (V^H)
  FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, V, &Vb );
  FLA_Trmm_external( FLA_LEFT, uplo, FLA_NO_TRANSPOSE,
                     FLA_NONUNIT_DIAG, FLA_ONE, B, Vb );

  // A := U Vb
  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
                     FLA_ONE, U, Vb, FLA_ZERO, A_recovered );

  residual_A    = FLA_Max_elemwise_diff( A_copy, A_recovered );

  if (1) {
    FLA_Obj_fshow( stdout, " - Given - ", A_copy, "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - Factor - ", A, "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - TU - ", TU, "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - TV - ", TV, "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - B - ", B, "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - U - ", U, "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - V - ", V, "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - Vb - ", Vb, "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - U'U - ", DU,  "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - VV' - ", DV,  "% 6.4e", "------");
    FLA_Obj_fshow( stdout, " - Recovered A - ", A_recovered, "% 6.4e", "------");
    fprintf( stdout, "lapack2flame: %lu x %lu: ", m, n);
    fprintf( stdout, "recovery A = %12.10e\n\n", residual_A ) ;
  }
  
  FLA_Obj_free( &A );
  FLA_Obj_free( &TU );
  FLA_Obj_free( &TV );

  FLA_Obj_free( &B );

  FLA_Obj_free( &U );
  FLA_Obj_free( &V );
  FLA_Obj_free( &Vb );

  FLA_Obj_free( &DU );
  FLA_Obj_free( &DV );

  FLA_Obj_free( &A_copy );
  FLA_Obj_free( &A_recovered );


  FLA_Finalize_safe( init_result );     
}