FLA_Error FLA_Trsm_internal( FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trsm_internal_check( side, uplo, transa, diag, alpha, A, B, cntl ); if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_MATRIX && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { // Recurse r_val = FLA_Trsm_internal( side, uplo, transa, diag, alpha, *FLASH_OBJ_PTR_AT( A ), *FLASH_OBJ_PTR_AT( B ), flash_trsm_cntl_mm ); } else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_Trsm( side, uplo, transa, diag, alpha, A, B, cntl ); } else { if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && !FLASH_Queue_get_enabled( ) ) { // Execute leaf cntl = flash_trsm_cntl_blas; } // Parameter combinations if ( side == FLA_LEFT ) { if ( uplo == FLA_LOWER_TRIANGULAR ) { if ( transa == FLA_NO_TRANSPOSE ) r_val = FLA_Trsm_lln( diag, alpha, A, B, cntl ); else if ( transa == FLA_TRANSPOSE ) r_val = FLA_Trsm_llt( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Trsm_llc( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_TRANSPOSE ) r_val = FLA_Trsm_llh( diag, alpha, A, B, cntl ); } else if ( uplo == FLA_UPPER_TRIANGULAR ) { if ( transa == FLA_NO_TRANSPOSE ) r_val = FLA_Trsm_lun( diag, alpha, A, B, cntl ); else if ( transa == FLA_TRANSPOSE ) r_val = FLA_Trsm_lut( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Trsm_luc( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_TRANSPOSE ) r_val = FLA_Trsm_luh( diag, alpha, A, B, cntl ); } } else if ( side == FLA_RIGHT ) { if ( uplo == FLA_LOWER_TRIANGULAR ) { if ( transa == FLA_NO_TRANSPOSE ) r_val = FLA_Trsm_rln( diag, alpha, A, B, cntl ); else if ( transa == FLA_TRANSPOSE ) r_val = FLA_Trsm_rlt( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Trsm_rlc( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_TRANSPOSE ) r_val = FLA_Trsm_rlh( diag, alpha, A, B, cntl ); } else if ( uplo == FLA_UPPER_TRIANGULAR ) { if ( transa == FLA_NO_TRANSPOSE ) r_val = FLA_Trsm_run( diag, alpha, A, B, cntl ); else if ( transa == FLA_TRANSPOSE ) r_val = FLA_Trsm_rut( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Trsm_ruc( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_TRANSPOSE ) r_val = FLA_Trsm_ruh( diag, alpha, A, B, cntl ); } } } return r_val; }
FLA_Error FLASH_Axpy_hierarchy( int direction, FLA_Obj alpha, FLA_Obj F, FLA_Obj* H ) { // Once we get down to a submatrix whose elements are scalars, we are down // to our base case. if ( FLA_Obj_elemtype( *H ) == FLA_SCALAR ) { // Depending on which top-level function invoked us, we either axpy // the source data in the flat matrix to the leaf-level submatrix of // the hierarchical matrix, or axpy the data in the hierarchical // submatrix to the flat matrix. if ( direction == FLA_FLAT_TO_HIER ) { #ifdef FLA_ENABLE_SCC if ( FLA_is_owner() ) #endif FLA_Axpy_external( alpha, F, *H ); } else if ( direction == FLA_HIER_TO_FLAT ) { #ifdef FLA_ENABLE_SCC if ( FLA_is_owner() ) #endif FLA_Axpy_external( alpha, *H, F ); } } else { FLA_Obj HL, HR, H0, H1, H2; FLA_Obj FL, FR, F0, F1, F2; FLA_Obj H1T, H01, H1B, H11, H21; FLA_Obj F1T, F01, F1B, F11, F21; dim_t b_m; dim_t b_n; FLA_Part_1x2( *H, &HL, &HR, 0, FLA_LEFT ); FLA_Part_1x2( F, &FL, &FR, 0, FLA_LEFT ); while ( FLA_Obj_width( HL ) < FLA_Obj_width( *H ) ) { FLA_Repart_1x2_to_1x3( HL, /**/ HR, &H0, /**/ &H1, &H2, 1, FLA_RIGHT ); // Get the scalar width of H1 and use that to determine the // width of F1. b_n = FLASH_Obj_scalar_width( H1 ); FLA_Repart_1x2_to_1x3( FL, /**/ FR, &F0, /**/ &F1, &F2, b_n, FLA_RIGHT ); // ------------------------------------------------------------- FLA_Part_2x1( H1, &H1T, &H1B, 0, FLA_TOP ); FLA_Part_2x1( F1, &F1T, &F1B, 0, FLA_TOP ); while ( FLA_Obj_length( H1T ) < FLA_Obj_length( H1 ) ) { FLA_Repart_2x1_to_3x1( H1T, &H01, /* ** */ /* *** */ &H11, H1B, &H21, 1, FLA_BOTTOM ); // Get the scalar length of H11 and use that to determine the // length of F11. b_m = FLASH_Obj_scalar_length( H11 ); FLA_Repart_2x1_to_3x1( F1T, &F01, /* ** */ /* *** */ &F11, F1B, &F21, b_m, FLA_BOTTOM ); // ------------------------------------------------------------- // Recursively axpy between F11 and H11. FLASH_Axpy_hierarchy( direction, alpha, F11, FLASH_OBJ_PTR_AT( H11 ) ); // ------------------------------------------------------------- FLA_Cont_with_3x1_to_2x1( &H1T, H01, H11, /* ** */ /* *** */ &H1B, H21, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &F1T, F01, F11, /* ** */ /* *** */ &F1B, F21, FLA_TOP ); } // ------------------------------------------------------------- FLA_Cont_with_1x3_to_1x2( &HL, /**/ &HR, H0, H1, /**/ H2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &FL, /**/ &FR, F0, F1, /**/ F2, FLA_LEFT ); } } return FLA_SUCCESS; }