FLA_Error FLASH_LU_piv( FLA_Obj A, FLA_Obj p ) { FLA_Error r_val = FLA_SUCCESS; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_LU_piv_check( A, p ); // *** The current LU_piv algorithm implemented assumes that // the matrix has a hierarchical depth of 1. We check for that here, because // we anticipate that we'll use a more general algorithm in the future, and // we don't want to forget to remove the constraint. *** if ( FLASH_Obj_depth( A ) != 1 ) { FLA_Print_message( "FLASH_LU_piv() currently only supports matrices of depth 1", __FILE__, __LINE__ ); FLA_Abort(); } // Begin a parallel region. FLASH_Queue_begin(); // Invoke FLA_LU_piv_internal() with large control tree. FLA_LU_piv_internal( A, p, flash_lu_piv_cntl ); // End the parallel region. FLASH_Queue_end(); // Check for singularity. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) r_val = FLASH_LU_find_zero_on_diagonal( A ); return r_val; }
FLA_Error FLA_Check_error_code_helper( int code, char* file, int line ) { if ( code == FLA_SUCCESS ) return code; //if ( /* fatal error checking enabled */ ) if ( TRUE ) { if ( FLA_ERROR_CODE_MAX <= code && code <= FLA_ERROR_CODE_MIN ) { FLA_Print_message( FLA_Error_string_for_code( code ), file, line ); FLA_Abort(); } else { FLA_Print_message( FLA_Error_string_for_code( FLA_UNDEFINED_ERROR_CODE ), file, line ); FLA_Abort(); } } return code; }
FLA_Error FLASH_Apply_Q_UT( FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B ) { FLA_Error r_val; dim_t b_alg; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Apply_Q_UT_check( side, trans, direct, storev, A, T, W, B ); // Inspect the length of TTL to get the blocksize used by the QR/LQ // factorization, which will be our inner blocksize for Apply_Q_UT. b_alg = FLASH_Obj_scalar_length_tl( T ); // The traditional (non-incremental) Apply_Q_UT algorithm-by-blocks // requires that the algorithmic blocksize be equal to the storage // blocksize. if ( b_alg != FLASH_Obj_scalar_width_tl( T ) ) { FLA_Print_message( "FLASH_Apply_Q_UT() requires that b_alg == b_store", __FILE__, __LINE__ ); FLA_Abort(); } // Adjust the blocksize of the control tree node for the flat subproblem. if ( FLA_Cntl_blocksize( fla_apqut_cntl_leaf ) != NULL ) FLA_Blocksize_set( FLA_Cntl_blocksize( fla_apqut_cntl_leaf ), b_alg, b_alg, b_alg, b_alg ); // Begin a parallel region. FLASH_Queue_begin(); // Invoke FLA_Apply_Q_UT_internal() with the standard control tree. r_val = FLA_Apply_Q_UT_internal( side, trans, direct, storev, A, T, W, B, flash_apqut_cntl_blas ); // End the parallel region. FLASH_Queue_end(); return r_val; }
int FLA_task_determine_matrix_size( FLA_Obj A, FLA_Quadrant from ) { int r_val = 0; // Determine the size of the matrix dimension along which we are moving. switch( from ) { case FLA_TOP: case FLA_BOTTOM: { r_val = FLA_Obj_length( A ); break; } case FLA_LEFT: case FLA_RIGHT: { r_val = FLA_Obj_width( A ); break; } case FLA_TL: case FLA_TR: case FLA_BL: case FLA_BR: { // If A happens to be the full object, we need to use min_dim() here // because the matrix might be rectangular. If A is the processed // partition, it is very probably square, and min_dim() doesn't hurt. r_val = FLA_Obj_min_dim( A ); break; } default: FLA_Print_message( "Unexpected default in switch statement!", __FILE__, __LINE__ ); FLA_Abort(); } return r_val; }
int FLA_Task_compute_blocksize( int tag, FLA_Obj A, FLA_Obj A_proc, FLA_Quadrant from ) { int n_threads = FLA_Queue_get_num_threads(); int A_size, A_proc_size; int n_part; int b; // Determine the sizes of the matrix partitions. A_size = FLA_task_determine_matrix_size( A, from ); A_proc_size = FLA_task_determine_matrix_size( A_proc, from ); // Determine the raw blocksize value. n_part = FLA_task_get_num_partitions( n_threads, tag ); // Determine the blocksize based on the sign of the value from // _get_num_partitions(). if( n_part > 0 ) { b = FLA_task_determine_absolute_blocksize( A_size, A_proc_size, n_part ); } else if( n_part < 0 ) { b = FLA_task_determine_relative_blocksize( A_size, A_proc_size, abs(n_part) ); } else { FLA_Print_message( "Detected blocksize of 0!", __FILE__, __LINE__ ); FLA_Abort(); } return b; }