コード例 #1
0
ファイル: FLASH_LU_piv.c プロジェクト: anaptyxis/libflame
FLA_Error FLASH_LU_piv( FLA_Obj A, FLA_Obj p )
{
  FLA_Error r_val = FLA_SUCCESS;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_LU_piv_check( A, p );

  // *** The current LU_piv algorithm implemented assumes that
  // the matrix has a hierarchical depth of 1. We check for that here, because
  // we anticipate that we'll use a more general algorithm in the future, and
  // we don't want to forget to remove the constraint. ***
  if ( FLASH_Obj_depth( A ) != 1 )
  {
    FLA_Print_message( "FLASH_LU_piv() currently only supports matrices of depth 1",
                       __FILE__, __LINE__ );
    FLA_Abort();
  }

  // Begin a parallel region.
  FLASH_Queue_begin();

  // Invoke FLA_LU_piv_internal() with large control tree.
  FLA_LU_piv_internal( A, p, flash_lu_piv_cntl );

  // End the parallel region.
  FLASH_Queue_end();

  // Check for singularity.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    r_val = FLASH_LU_find_zero_on_diagonal( A );

  return r_val;
}
コード例 #2
0
ファイル: FLA_Check.c プロジェクト: anaptyxis/libflame
FLA_Error FLA_Check_error_code_helper( int code, char* file, int line )
{
  if ( code == FLA_SUCCESS )
    return code;

  //if ( /* fatal error checking enabled */ )
  if ( TRUE )
  {
    if ( FLA_ERROR_CODE_MAX <= code && code <= FLA_ERROR_CODE_MIN )
    {
      FLA_Print_message( FLA_Error_string_for_code( code ),
                         file, line );
      FLA_Abort();
    }
    else
    {
      FLA_Print_message( FLA_Error_string_for_code( FLA_UNDEFINED_ERROR_CODE ),
                         file, line );
      FLA_Abort();
    }
  }

  return code;
}
コード例 #3
0
ファイル: FLASH_Apply_Q_UT.c プロジェクト: anaptyxis/libflame
FLA_Error FLASH_Apply_Q_UT( FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B )
{
  FLA_Error r_val;
  dim_t     b_alg;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Apply_Q_UT_check( side, trans, direct, storev, A, T, W, B );

  // Inspect the length of TTL to get the blocksize used by the QR/LQ
  // factorization, which will be our inner blocksize for Apply_Q_UT.
  b_alg = FLASH_Obj_scalar_length_tl( T );

  // The traditional (non-incremental) Apply_Q_UT algorithm-by-blocks
  // requires that the algorithmic blocksize be equal to the storage
  // blocksize.
  if ( b_alg != FLASH_Obj_scalar_width_tl( T ) )
  {
    FLA_Print_message( "FLASH_Apply_Q_UT() requires that b_alg == b_store",
                       __FILE__, __LINE__ );
    FLA_Abort();
  }

  // Adjust the blocksize of the control tree node for the flat subproblem.
  if ( FLA_Cntl_blocksize( fla_apqut_cntl_leaf ) != NULL )
    FLA_Blocksize_set( FLA_Cntl_blocksize( fla_apqut_cntl_leaf ),
                       b_alg, b_alg, b_alg, b_alg );

  // Begin a parallel region.
  FLASH_Queue_begin();
  
  // Invoke FLA_Apply_Q_UT_internal() with the standard control tree.
  r_val = FLA_Apply_Q_UT_internal( side, trans, direct, storev, A, T, W, B,
                                   flash_apqut_cntl_blas );

  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
コード例 #4
0
ファイル: FLA_task_partitioning.c プロジェクト: pgawron/tlash
int FLA_task_determine_matrix_size( FLA_Obj A, FLA_Quadrant from )
{
	int r_val = 0;
	
	// Determine the size of the matrix dimension along which we are moving.
	switch( from )
	{
		case FLA_TOP:
		case FLA_BOTTOM:
		{
			r_val = FLA_Obj_length( A );
			break;
		}
		case FLA_LEFT:
		case FLA_RIGHT:
		{
			r_val = FLA_Obj_width( A );
			break;
		}
		case FLA_TL:
		case FLA_TR:
		case FLA_BL:
		case FLA_BR:
		{
			// If A happens to be the full object, we need to use min_dim() here
			// because the matrix might be rectangular. If A is the processed
			// partition, it is very probably square, and min_dim() doesn't hurt.
			r_val = FLA_Obj_min_dim( A );
			break;
		}
		default:
			FLA_Print_message( "Unexpected default in switch statement!", __FILE__, __LINE__ );
			FLA_Abort();
	}

	return r_val;
}
コード例 #5
0
ファイル: FLA_task_partitioning.c プロジェクト: pgawron/tlash
int FLA_Task_compute_blocksize( int tag, FLA_Obj A, FLA_Obj A_proc, FLA_Quadrant from )
{
	int n_threads = FLA_Queue_get_num_threads();
	int A_size, A_proc_size;
	int n_part;
	int b;
	
	// Determine the sizes of the matrix partitions.
	A_size      = FLA_task_determine_matrix_size( A, from );
	A_proc_size = FLA_task_determine_matrix_size( A_proc, from );
	
	// Determine the raw blocksize value.
	n_part      = FLA_task_get_num_partitions( n_threads, tag );
	
	// Determine the blocksize based on the sign of the value from
	// _get_num_partitions().
	if( n_part > 0 )
	{
		b = FLA_task_determine_absolute_blocksize( A_size,
	                                               A_proc_size,
	                                               n_part );
	}
	else if( n_part < 0 )
	{
	    b = FLA_task_determine_relative_blocksize( A_size,
	                                               A_proc_size,
	                                               abs(n_part) );
	}
	else
	{
		FLA_Print_message( "Detected blocksize of 0!", __FILE__, __LINE__ );
        FLA_Abort();
	}

	return b;
}