Exemple #1
0
FLA_Error LU_blk_var4( FLA_Obj A, int nb_alg )
{
  FLA_Obj ATL,   ATR,      A00, A01, A02, 
          ABL,   ABR,      A10, A11, A12,
                           A20, A21, A22;

  dim_t b;

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );

  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){

    b = min( FLA_Obj_length( ABR ), nb_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );

    /*------------------------------------------------------------*/

    /* A11 = A11 - A10 * A01 ); */
    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
              FLA_MINUS_ONE, A10, A01, FLA_ONE, A11 );

    /* A11 = LU( A11 ); */
    LU_unb_var4( A11 );

    /* A12 = A12 - A10 * A02; */
    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
              FLA_MINUS_ONE, A10, A02, FLA_ONE, A12 );

    /* A12 = inv( trilu( A11 ) ) * A12; */
    FLA_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
              FLA_ONE, A11, A12 );

    /* A21 = A21 - A20 * A01; */
    FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
              FLA_MINUS_ONE, A20, A01, FLA_ONE, A21 );

    /* A21 = A21 * inv( triu( A11 ) ); */
    FLA_Trsm( FLA_RIGHT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
              FLA_ONE, A11, A21 );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );
  }

  return FLA_SUCCESS;
}
Exemple #2
0
int LU_blk_var3( FLA_Obj A, int nb_alg )
{
  FLA_Obj ATL,   ATR,      A00, A01, A02, 
          ABL,   ABR,      A10, A11, A12,
                           A20, A21, A22;

  int b;

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );

  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){

    b = min( FLA_Obj_length( ABR ), nb_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );

    /*------------------------------------------------------------*/

    // A01 := inv(L00) * A01
    FLA_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR,
	      FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
	      FLA_ONE, A00, A01 );

    // A11 := LU(A11 - A10 * A01)
    FLA_Gemm(FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
                FLA_MINUS_ONE, A10, A01, FLA_ONE, A11);
    LU_unb_var3(A11);

    // A21 := (A21 - A20 * A01) * inv(U11)
    FLA_Gemm(FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
	      FLA_MINUS_ONE, A20, A01, FLA_ONE, A21);
    FLA_Trsm( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
	      FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
	      FLA_ONE, A11, A21);

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );

  }

  return FLA_SUCCESS;
}
Exemple #3
0
void libfla_test_eig_gest_experiment( test_params_t params,
                                      unsigned int  var,
                                      char*         sc_str,
                                      FLA_Datatype  datatype,
                                      unsigned int  p_cur,
                                      unsigned int  pci,
                                      unsigned int  n_repeats,
                                      signed int    impl,
                                      double*       perf,
                                      double*       residual )
{
	dim_t        b_flash    = params.b_flash;
	dim_t        b_alg_flat = params.b_alg_flat;
	double       time_min   = 1e9;
	double       time;
	unsigned int i;
	unsigned int m;
	signed int   m_input    = -1;
	FLA_Uplo     inv;
	FLA_Uplo     uplo;
	FLA_Obj      A, B, Y, norm;
	FLA_Obj      A_save, B_save;
	FLA_Obj      A_test, B_test, Y_test;

	// Determine the dimensions.
	if ( m_input < 0 ) m = p_cur / abs(m_input);
	else               m = p_cur;

	// Translate parameter characters to libflame constants.
	FLA_Param_map_char_to_flame_inv( &pc_str[pci][0], &inv );
	FLA_Param_map_char_to_flame_uplo( &pc_str[pci][1], &uplo );

	if ( inv == FLA_NO_INVERSE &&
         ( ( impl == FLA_TEST_FLAT_UNB_VAR && var == 3 ) ||
	       ( impl == FLA_TEST_FLAT_OPT_VAR && var == 3 ) ||
	       ( impl == FLA_TEST_FLAT_BLK_VAR && var == 3 ) )
       )
	{
		*perf     = 0.0;
		*residual = 0.0;
		return;
	}

	// Create the matrices for the current operation.
	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[0], m, m, &A );
	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[1], m, m, &Y );
	libfla_test_obj_create( datatype, FLA_NO_TRANSPOSE, sc_str[2], m, m, &B );

	// Initialize the test matrices.
	FLA_Random_spd_matrix( uplo, A );
    FLA_Scalr( uplo, FLA_TWO, A );
	FLA_Hermitianize( uplo, A );

	FLA_Random_spd_matrix( uplo, B );
    FLA_Scalr( uplo, FLA_TWO, B );
	FLA_Chol( uplo, B );

	// Save the original object contents in a temporary object.
	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, A, &A_save );
	FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &B_save );

	// Create a real scalar object to hold the norm of A.
	FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm );

	// Use hierarchical matrices if we're testing the FLASH front-end.
	if ( impl == FLA_TEST_HIER_FRONT_END )
	{
		FLASH_Obj_create_hier_copy_of_flat( A, 1, &b_flash, &A_test );
		FLASH_Obj_create_hier_copy_of_flat( Y, 1, &b_flash, &Y_test );
		FLASH_Obj_create_hier_copy_of_flat( B, 1, &b_flash, &B_test );
	}
	else
	{
		A_test = A;
		Y_test = Y;
		B_test = B;
	}

	// Create a control tree for the individual variants.
	if ( impl == FLA_TEST_FLAT_UNB_VAR ||
	     impl == FLA_TEST_FLAT_OPT_VAR ||
	     impl == FLA_TEST_FLAT_BLK_VAR )
		libfla_test_eig_gest_cntl_create( var, b_alg_flat );

	// Repeat the experiment n_repeats times and record results.
	for ( i = 0; i < n_repeats; ++i )
	{
		if ( impl == FLA_TEST_HIER_FRONT_END )
		{
			FLASH_Obj_hierarchify( A_save, A_test );
			FLASH_Obj_hierarchify( B_save, B_test );
		}
		else
		{
			FLA_Copy_external( A_save, A_test );
			FLA_Copy_external( B_save, B_test );
		}
		
		time = FLA_Clock();
		
		libfla_test_eig_gest_impl( impl, inv, uplo, A_test, Y_test, B_test );
		
		time = FLA_Clock() - time;
		time_min = min( time_min, time );
	}

	// Check our solution.
	if ( impl == FLA_TEST_HIER_FRONT_END )
	{
		FLA_Trans trans_left, trans_right;
		
		FLASH_Hermitianize( uplo, A_test );
		
		if ( ( inv == FLA_NO_INVERSE && uplo == FLA_LOWER_TRIANGULAR ) ||
		     ( inv == FLA_INVERSE    && uplo == FLA_UPPER_TRIANGULAR ) )
		{
			trans_left  = FLA_CONJ_TRANSPOSE;
			trans_right = FLA_NO_TRANSPOSE;
		}
		else
		{
			trans_left  = FLA_NO_TRANSPOSE;
			trans_right = FLA_CONJ_TRANSPOSE;
		}

		if ( inv == FLA_NO_INVERSE )
		{
			FLASH_Trsm( FLA_LEFT, uplo, trans_left, FLA_NONUNIT_DIAG,
			            FLA_ONE, B_test, A_test );
			FLASH_Trsm( FLA_RIGHT, uplo, trans_right, FLA_NONUNIT_DIAG,
			            FLA_ONE, B_test, A_test );
		}
		else // if ( inv == FLA_INVERSE )
		{
			FLASH_Trmm( FLA_LEFT, uplo, trans_left, FLA_NONUNIT_DIAG,
			            FLA_ONE, B_test, A_test );
			FLASH_Trmm( FLA_RIGHT, uplo, trans_right, FLA_NONUNIT_DIAG,
			            FLA_ONE, B_test, A_test );
		}
		FLASH_Obj_flatten( A_test, A );
	}
	else
	{
		FLA_Trans trans_left, trans_right;

		FLA_Hermitianize( uplo, A_test );

		if ( ( inv == FLA_NO_INVERSE && uplo == FLA_LOWER_TRIANGULAR ) ||
		     ( inv == FLA_INVERSE    && uplo == FLA_UPPER_TRIANGULAR ) )
		{
			trans_left  = FLA_CONJ_TRANSPOSE;
			trans_right = FLA_NO_TRANSPOSE;
		}
		else
		{
			trans_left  = FLA_NO_TRANSPOSE;
			trans_right = FLA_CONJ_TRANSPOSE;
		}

		if ( inv == FLA_NO_INVERSE )
		{
			FLA_Trsm( FLA_LEFT, uplo, trans_left, FLA_NONUNIT_DIAG,
			          FLA_ONE, B_test, A_test );
			FLA_Trsm( FLA_RIGHT, uplo, trans_right, FLA_NONUNIT_DIAG,
		              FLA_ONE, B_test, A_test );
		}
		else // if ( inv == FLA_INVERSE )
		{
			FLA_Trmm( FLA_LEFT, uplo, trans_left, FLA_NONUNIT_DIAG,
			          FLA_ONE, B_test, A_test );
			FLA_Trmm( FLA_RIGHT, uplo, trans_right, FLA_NONUNIT_DIAG,
		              FLA_ONE, B_test, A_test );
		}
	}

	// Free the hierarchical matrices if we're testing the FLASH front-end.
	if ( impl == FLA_TEST_HIER_FRONT_END )
	{
		FLASH_Obj_free( &A_test );
		FLASH_Obj_free( &Y_test );
		FLASH_Obj_free( &B_test );
	}

	// Free the control trees if we're testing the variants.
	if ( impl == FLA_TEST_FLAT_UNB_VAR ||
	     impl == FLA_TEST_FLAT_OPT_VAR ||
	     impl == FLA_TEST_FLAT_BLK_VAR )
		libfla_test_eig_gest_cntl_free();

	// Compute the performance of the best experiment repeat.
	*perf = 1.0 * m * m * m / time_min / FLOPS_PER_UNIT_PERF;
	if ( FLA_Obj_is_complex( A ) ) *perf *= 4.0;

	// Compute the residual.
	FLA_Axpy_external( FLA_MINUS_ONE, A_save, A );
	FLA_Norm1( A, norm );
	FLA_Obj_extract_real_scalar( norm, residual );

	// Free the supporting flat objects.
	FLA_Obj_free( &norm );
	FLA_Obj_free( &A_save );
	FLA_Obj_free( &B_save );

	// Free the flat test matrices.
	FLA_Obj_free( &A );
	FLA_Obj_free( &Y );
	FLA_Obj_free( &B );
}