FLA_Error FLASH_QR_UT_inc_opt1( FLA_Obj A, FLA_Obj TW )
{
  FLA_Error r_val;
  FLA_Obj   U;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_QR_UT_inc_check( A, TW );

  // Create a temporary matrix to hold copies of all of the blocks along the
  // diagonal of A.
  FLASH_Obj_create_diag_panel( A, &U );

  // Begin a parallel region.
  FLASH_Queue_begin();

  // Invoke FLA_QR_UT_inc_blk_var2() with the standard control tree.
  r_val = FLA_QR_UT_inc_blk_var2( A, TW, U, flash_qrutinc_cntl );

  // End the parallel region.
  FLASH_Queue_end();

  // Free the temporary matrix.
  FLASH_Obj_free( &U );

  return r_val;
}
FLA_Error FLASH_CAQR_UT_inc_noopt( dim_t p, FLA_Obj A, FLA_Obj ATW, FLA_Obj R, FLA_Obj RTW )
{
  FLA_Error r_val = FLA_SUCCESS;
  dim_t     nb_part;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_CAQR_UT_inc_check( p, A, ATW, R, RTW );

  // Compute the partition length from the number of partitions.
  nb_part = FLA_CAQR_UT_inc_compute_blocks_per_part( p, A );

  // Begin a parallel region.
  FLASH_Queue_begin();

  // Perform incremental QR's on each of the p partitions.
  FLA_CAQR_UT_inc_factorize_panels( nb_part, A, ATW );

  // Copy the triangles of A into R.
  FLA_CAQR_UT_inc_copy_triangles( nb_part, A, R );

  // Perform an incremental CAQR on the resulting upper triangular R's in A.
  FLA_CAQR_UT_inc_blk_var1( R, RTW, flash_caqrutinc_cntl );

  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
Beispiel #3
0
FLA_Error FLASH_LU_piv( FLA_Obj A, FLA_Obj p )
{
  FLA_Error r_val = FLA_SUCCESS;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_LU_piv_check( A, p );

  // *** The current LU_piv algorithm implemented assumes that
  // the matrix has a hierarchical depth of 1. We check for that here, because
  // we anticipate that we'll use a more general algorithm in the future, and
  // we don't want to forget to remove the constraint. ***
  if ( FLASH_Obj_depth( A ) != 1 )
  {
    FLA_Print_message( "FLASH_LU_piv() currently only supports matrices of depth 1",
                       __FILE__, __LINE__ );
    FLA_Abort();
  }

  // Begin a parallel region.
  FLASH_Queue_begin();

  // Invoke FLA_LU_piv_internal() with large control tree.
  FLA_LU_piv_internal( A, p, flash_lu_piv_cntl );

  // End the parallel region.
  FLASH_Queue_end();

  // Check for singularity.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    r_val = FLASH_LU_find_zero_on_diagonal( A );

  return r_val;
}
FLA_Error FLASH_Apply_CAQ_UT_inc( dim_t p,
                                  FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev,
                                  FLA_Obj A, FLA_Obj ATW, FLA_Obj R, FLA_Obj RTW, FLA_Obj W, FLA_Obj B )
{
  FLA_Error r_val;
  dim_t     nb_part;
  FLA_Obj   WT, WB;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Apply_CAQ_UT_inc_check( side, trans, direct, storev, A, ATW, R, RTW, W, B );

  // Compute the partition length from the number of partitions.
  nb_part = FLA_CAQR_UT_inc_compute_blocks_per_part( p, R );

  // Begin a parallel region.
  FLASH_Queue_begin();

  // Apply the individual Q's from the incremental QR factorizations.
  FLA_Apply_CAQ_UT_inc_apply_panels( nb_part, A, ATW, W, B );

  FLA_Part_2x1( W,   &WT,
                     &WB,    1, FLA_TOP );

  // Apply the Q from the factorization of the upper triangular R's.
  r_val = FLA_Apply_CAQ_UT_inc_internal( side, trans, direct, storev,
                                         R, RTW, WT, B, flash_apcaqutinc_cntl );


  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
Beispiel #5
0
FLA_Error FLASH_Her2k( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C )
{
  FLA_Error r_val;
  
  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C );

  // Begin a parallel region.
  FLASH_Queue_begin();
  
  // Enqueue tasks via a SuperMatrix-aware control tree.
  r_val = FLA_Her2k_internal( uplo, trans, alpha, A, B, beta, C, flash_her2k_cntl_mm );
  
  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
Beispiel #6
0
FLA_Error FLASH_Lyap( FLA_Trans trans, FLA_Obj isgn, FLA_Obj A, FLA_Obj C, FLA_Obj scale )
{
  FLA_Error r_val;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Lyap_check( trans, isgn, A, C, scale );

  // Begin a parallel region.
  FLASH_Queue_begin();
  
  // Enqueue tasks via a SuperMatrix-aware control tree.
  r_val = FLA_Lyap_internal( trans, isgn, A, C, scale, flash_lyap_cntl );
  
  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
Beispiel #7
0
FLA_Error FLASH_Copy( FLA_Obj A, FLA_Obj B )
{
  FLA_Error r_val;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Copy_check( A, B );

  // Begin a parallel region.
  FLASH_Queue_begin();

  // Execute tasks.
  r_val = FLA_Copy_internal( A, B, flash_copy_cntl );

  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
Beispiel #8
0
FLA_Error FLASH_Trsm( FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B )
{
  FLA_Error r_val;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Trsm_check( side, uplo, trans, diag, alpha, A, B );

  // Begin a parallel region.
  FLASH_Queue_begin();
  
  // Enqueue tasks via a SuperMatrix-aware control tree.
  r_val = FLA_Trsm_internal( side, uplo, trans, diag, alpha, A, B, flash_trsm_cntl_mm );
  
  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
Beispiel #9
0
FLA_Error FLASH_Chol( FLA_Uplo uplo, FLA_Obj A )
{
  FLA_Error r_val;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Chol_check( uplo, A );

  // Begin a parallel region.
  FLASH_Queue_begin();
  
  // Enqueue tasks via a SuperMatrix-aware control tree.
  r_val = FLA_Chol_internal( uplo, A, flash_chol_cntl );
  
  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
Beispiel #10
0
FLA_Error FLASH_Apply_Q2_UT( FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev,
                             FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C,
                                                              FLA_Obj E )
{
  FLA_Error r_val;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Apply_Q2_UT_check( side, trans, direct, storev, D, T, W, C, E );

  // Begin a parallel region.
  FLASH_Queue_begin();
  
  // Invoke FLA_Apply_Q2_UT_internal() with the standard control tree.
  r_val = FLA_Apply_Q2_UT_internal( side, trans, direct, storev, D, T, W, C, E, flash_apq2ut_cntl );

  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
Beispiel #11
0
FLA_Error FLASH_Apply_Q_UT( FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B )
{
  FLA_Error r_val;
  dim_t     b_alg;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Apply_Q_UT_check( side, trans, direct, storev, A, T, W, B );

  // Inspect the length of TTL to get the blocksize used by the QR/LQ
  // factorization, which will be our inner blocksize for Apply_Q_UT.
  b_alg = FLASH_Obj_scalar_length_tl( T );

  // The traditional (non-incremental) Apply_Q_UT algorithm-by-blocks
  // requires that the algorithmic blocksize be equal to the storage
  // blocksize.
  if ( b_alg != FLASH_Obj_scalar_width_tl( T ) )
  {
    FLA_Print_message( "FLASH_Apply_Q_UT() requires that b_alg == b_store",
                       __FILE__, __LINE__ );
    FLA_Abort();
  }

  // Adjust the blocksize of the control tree node for the flat subproblem.
  if ( FLA_Cntl_blocksize( fla_apqut_cntl_leaf ) != NULL )
    FLA_Blocksize_set( FLA_Cntl_blocksize( fla_apqut_cntl_leaf ),
                       b_alg, b_alg, b_alg, b_alg );

  // Begin a parallel region.
  FLASH_Queue_begin();
  
  // Invoke FLA_Apply_Q_UT_internal() with the standard control tree.
  r_val = FLA_Apply_Q_UT_internal( side, trans, direct, storev, A, T, W, B,
                                   flash_apqut_cntl_blas );

  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
Beispiel #12
0
FLA_Error FLASH_LU_nopiv( FLA_Obj A )
{
  FLA_Error r_val;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_LU_nopiv_check( A );

  // Begin a parallel region.
  FLASH_Queue_begin();
  
  // Enqueue tasks via a SuperMatrix-aware control tree.
  r_val = FLA_LU_nopiv_internal( A, flash_lu_nopiv_cntl );
  
  // End the parallel region.
  FLASH_Queue_end();

  // Check for singularity.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    r_val = FLASH_LU_find_zero_on_diagonal( A );

  return r_val;
}