FLA_Error FLA_Symm_internal( FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_symm_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Symm_internal_check( side, uplo, alpha, A, B, beta, C, cntl ); if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_MATRIX && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { // Recurse r_val = FLA_Symm_internal( side, uplo, alpha, *FLASH_OBJ_PTR_AT( A ), *FLASH_OBJ_PTR_AT( B ), beta, *FLASH_OBJ_PTR_AT( C ), flash_symm_cntl_mm ); } else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_Symm( side, uplo, alpha, A, B, beta, C, cntl ); } else { if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && !FLASH_Queue_get_enabled( ) ) { // Execute leaf cntl = flash_symm_cntl_blas; } // Parameter combinations if ( side == FLA_LEFT ) { if ( uplo == FLA_LOWER_TRIANGULAR ) r_val = FLA_Symm_ll( alpha, A, B, beta, C, cntl ); else if ( uplo == FLA_UPPER_TRIANGULAR ) r_val = FLA_Symm_lu( alpha, A, B, beta, C, cntl ); } else if ( side == FLA_RIGHT ) { if ( uplo == FLA_LOWER_TRIANGULAR ) r_val = FLA_Symm_rl( alpha, A, B, beta, C, cntl ); else if ( uplo == FLA_UPPER_TRIANGULAR ) r_val = FLA_Symm_ru( alpha, A, B, beta, C, cntl ); } } return r_val; }
FLA_Error FLA_Copyt_internal( FLA_Trans trans, FLA_Obj A, FLA_Obj B, fla_copyt_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Copyt_internal_check( trans, A, B, cntl ); if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_MATRIX && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { // Recurse r_val = FLA_Copyt_internal( trans, *FLASH_OBJ_PTR_AT( A ), *FLASH_OBJ_PTR_AT( B ), flash_copyt_cntl ); } else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_Copyt( trans, A, B, cntl ); } else { if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && !FLASH_Queue_get_enabled( ) ) { // Execute leaf cntl = flash_copyt_cntl_blas; } // Parameter combinations if ( trans == FLA_NO_TRANSPOSE ) { r_val = FLA_Copyt_n( A, B, cntl ); } else if ( trans == FLA_TRANSPOSE ) { r_val = FLA_Copyt_t( A, B, cntl ); } else if ( trans == FLA_CONJ_NO_TRANSPOSE ) { r_val = FLA_Copyt_c( A, B, cntl ); } else if ( trans == FLA_CONJ_TRANSPOSE ) { r_val = FLA_Copyt_h( A, B, cntl ); } } return r_val; }
FLA_Error FLA_LQ_UT_internal( FLA_Obj A, FLA_Obj T, fla_lqut_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_LQ_UT_internal_check( A, T, cntl ); if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { if ( FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_LQ_UT_macro( A, T, cntl ); } else { // Execute r_val = FLA_LQ_UT_macro_task( A, T, cntl ); } } else { if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 ) { r_val = FLA_LQ_UT_unb_var1( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 ) { r_val = FLA_LQ_UT_opt_var1( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 ) { r_val = FLA_LQ_UT_blk_var1( A, T, cntl ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 ) { r_val = FLA_LQ_UT_unb_var2( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 ) { r_val = FLA_LQ_UT_opt_var2( A, T ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 ) { r_val = FLA_LQ_UT_blk_var2( A, T, cntl ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 ) { r_val = FLA_LQ_UT_blk_var3( A, T, cntl ); } else { FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); } } return r_val; }
FLA_Error FLA_Apply_pivots_internal( FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A, fla_appiv_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { if ( FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_Apply_pivots_macro( side, trans, *FLASH_OBJ_PTR_AT( p ), A, cntl ); } else { // Execute leaf r_val = FLA_Apply_pivots_macro_task( side, trans, *FLASH_OBJ_PTR_AT( p ), A, cntl ); } } else { // Parameter combinations if ( trans == FLA_NO_TRANSPOSE ) { if ( side == FLA_LEFT ) { r_val = FLA_Apply_pivots_ln( p, A, cntl ); } else if ( side == FLA_RIGHT ) { r_val = FLA_Apply_pivots_rn( p, A, cntl ); } } else if ( trans == FLA_TRANSPOSE ) { if ( side == FLA_LEFT ) { r_val = FLA_Apply_pivots_lt( p, A, cntl ); } else if ( side == FLA_RIGHT ) { r_val = FLA_Apply_pivots_rt( p, A, cntl ); } } } return r_val; }
FLA_Error FLASH_Gemv( FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Error r_val; FLA_Bool enable_supermatrix; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Gemv_check( transa, alpha, A, x, beta, y ); // Find the status of SuperMatrix. enable_supermatrix = FLASH_Queue_get_enabled(); // Temporarily disable SuperMatrix. FLASH_Queue_disable(); // Execute tasks. r_val = FLA_Gemv_internal( transa, alpha, A, x, beta, y, flash_gemv_cntl_fm_rp ); // Restore SuperMatrix to its previous status. if ( enable_supermatrix ) FLASH_Queue_enable(); return r_val; }
FLA_Error FLASH_Axpy( FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Error r_val; FLA_Bool enable_supermatrix; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Axpy_check( alpha, A, B ); // Find the status of SuperMatrix. enable_supermatrix = FLASH_Queue_get_enabled(); // Temporarily disable SuperMatrix. FLASH_Queue_disable(); // Execute tasks. r_val = FLA_Axpy_internal( alpha, A, B, flash_axpy_cntl ); // Restore SuperMatrix to its previous status. if ( enable_supermatrix ) FLASH_Queue_enable(); return r_val; }
FLA_Error FLASH_Trsv( FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x ) { FLA_Error r_val; FLA_Bool enable_supermatrix; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Trsv_check( uplo, trans, diag, A, x ); // Find the status of SuperMatrix. enable_supermatrix = FLASH_Queue_get_enabled(); // Temporarily disable SuperMatrix. FLASH_Queue_disable(); // Execute tasks. r_val = FLA_Trsv_internal( uplo, trans, diag, A, x, flash_trsv_cntl ); // Restore SuperMatrix to its previous status. if ( enable_supermatrix ) FLASH_Queue_enable(); return r_val; }
FLA_Error FLA_Trinv_internal( FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A, fla_trinv_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trinv_internal_check( uplo, diag, A, cntl ); if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_MATRIX && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { // Recurse r_val = FLA_Trinv_internal( uplo, diag, *FLASH_OBJ_PTR_AT( A ), flash_trinv_cntl ); } else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_Trinv( uplo, diag, A, cntl ); } else { if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && !FLASH_Queue_get_enabled( ) ) { // Execute leaf cntl = fla_trinv_cntl_leaf; } // Parameter combinations if ( uplo == FLA_LOWER_TRIANGULAR ) { if ( diag == FLA_NONUNIT_DIAG ) { r_val = FLA_Trinv_ln( A, cntl ); } else if ( diag == FLA_UNIT_DIAG ) { r_val = FLA_Trinv_lu( A, cntl ); } } else if ( uplo == FLA_UPPER_TRIANGULAR ) { if ( diag == FLA_NONUNIT_DIAG ) { r_val = FLA_Trinv_un( A, cntl ); } else if ( diag == FLA_UNIT_DIAG ) { r_val = FLA_Trinv_uu( A, cntl ); } } } return r_val; }
FLA_Error FLA_Gemm_internal( FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Gemm_internal_check( transa, transb, alpha, A, B, beta, C, cntl ); if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && (FLA_Obj_elemtype( A ) == FLA_TENSOR || FLA_Obj_elemtype( A ) == FLA_MATRIX) && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { // Recurse r_val = FLA_Gemm_internal( transa, transb, alpha, *FLASH_OBJ_PTR_AT( A ), *FLASH_OBJ_PTR_AT( B ), beta, *FLASH_OBJ_PTR_AT( C ), flash_gemm_cntl_mm_op ); } else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_Gemm( transa, transb, alpha, A, B, beta, C, cntl ); } else { if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && !FLASH_Queue_get_enabled( ) ) { // Execute leaf cntl = flash_gemm_cntl_blas; } // Parameter combinations if ( transa == FLA_NO_TRANSPOSE ) { if ( transb == FLA_NO_TRANSPOSE ) r_val = FLA_Gemm_nn( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_TRANSPOSE ) r_val = FLA_Gemm_nt( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Gemm_nc( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_CONJ_TRANSPOSE ) r_val = FLA_Gemm_nh( alpha, A, B, beta, C, cntl ); } else if ( transa == FLA_TRANSPOSE ) { if ( transb == FLA_NO_TRANSPOSE ) r_val = FLA_Gemm_tn( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_TRANSPOSE ) r_val = FLA_Gemm_tt( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Gemm_tc( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_CONJ_TRANSPOSE ) r_val = FLA_Gemm_th( alpha, A, B, beta, C, cntl ); } else if ( transa == FLA_CONJ_NO_TRANSPOSE ) { if ( transb == FLA_NO_TRANSPOSE ) r_val = FLA_Gemm_cn( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_TRANSPOSE ) r_val = FLA_Gemm_ct( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Gemm_cc( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_CONJ_TRANSPOSE ) r_val = FLA_Gemm_ch( alpha, A, B, beta, C, cntl ); } else if ( transa == FLA_CONJ_TRANSPOSE ) { if ( transb == FLA_NO_TRANSPOSE ) r_val = FLA_Gemm_hn( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_TRANSPOSE ) r_val = FLA_Gemm_ht( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Gemm_hc( alpha, A, B, beta, C, cntl ); else if ( transb == FLA_CONJ_TRANSPOSE ) r_val = FLA_Gemm_hh( alpha, A, B, beta, C, cntl ); } } return r_val; }
FLA_Error FLASH_SA_LU( FLA_Obj B, FLA_Obj C, FLA_Obj D, FLA_Obj E, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t* cntl ) { FLA_Obj DT, D0, DB, D1, D2; FLA_Obj ET, E0, EB, E1, E2; FLA_Obj pT, p0, pB, p1, p2; FLA_Obj LT, L0, LB, L1, L2; FLA_Part_2x1( D, &DT, &DB, 0, FLA_TOP ); FLA_Part_2x1( E, &ET, &EB, 0, FLA_TOP ); FLA_Part_2x1( p, &pT, &pB, 0, FLA_TOP ); FLA_Part_2x1( L, <, &LB, 0, FLA_TOP ); while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) ) { FLA_Repart_2x1_to_3x1( DT, &D0, /* ** */ /* ** */ &D1, DB, &D2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( ET, &E0, /* ** */ /* ** */ &E1, EB, &E2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( pT, &p0, /* ** */ /* ** */ &p1, pB, &p2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( LT, &L0, /* ** */ /* ** */ &L1, LB, &L2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ if ( FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_SA_LU( *FLASH_OBJ_PTR_AT( B ), *FLASH_OBJ_PTR_AT( D1 ), *FLASH_OBJ_PTR_AT( p1 ), *FLASH_OBJ_PTR_AT( L1 ), nb_alg, FLA_Cntl_sub_lu( cntl ) ); } else { // Execute leaf FLA_SA_LU_task( *FLASH_OBJ_PTR_AT( B ), *FLASH_OBJ_PTR_AT( D1 ), *FLASH_OBJ_PTR_AT( p1 ), *FLASH_OBJ_PTR_AT( L1 ), nb_alg, FLA_Cntl_sub_lu( cntl ) ); } FLASH_SA_FS( L1, D1, p1, C, E1, nb_alg, FLA_Cntl_sub_gemm1( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &DT, D0, D1, /* ** */ /* ** */ &DB, D2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &ET, E0, E1, /* ** */ /* ** */ &EB, E2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &pT, p0, p1, /* ** */ /* ** */ &pB, p2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( <, L0, L1, /* ** */ /* ** */ &LB, L2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_Q_UT_internal( FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Apply_Q_UT_internal_check( side, trans, direct, storev, A, T, W, B, cntl ); if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_MATRIX && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { // Recurse r_val = FLA_Apply_Q_UT_internal( side, trans, direct, storev, *FLASH_OBJ_PTR_AT( A ), *FLASH_OBJ_PTR_AT( T ), *FLASH_OBJ_PTR_AT( W ), *FLASH_OBJ_PTR_AT( B ), flash_apqut_cntl ); } else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_Apply_Q_UT( side, trans, direct, storev, A, T, W, B, cntl ); } else { if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && !FLASH_Queue_get_enabled( ) ) { // Execute leaf. cntl = fla_apqut_cntl_leaf; } if ( side == FLA_LEFT ) { if ( trans == FLA_NO_TRANSPOSE ) { if ( direct == FLA_FORWARD ) { if ( storev == FLA_COLUMNWISE ) r_val = FLA_Apply_Q_UT_lnfc( A, T, W, B, cntl ); else if ( storev == FLA_ROWWISE ) r_val = FLA_Apply_Q_UT_lnfr( A, T, W, B, cntl ); } else if ( direct == FLA_BACKWARD ) { if ( storev == FLA_COLUMNWISE ) r_val = FLA_Apply_Q_UT_lnbc( A, T, W, B, cntl ); else if ( storev == FLA_ROWWISE ) r_val = FLA_Apply_Q_UT_lnbr( A, T, W, B, cntl ); } } else if ( trans == FLA_TRANSPOSE || trans == FLA_CONJ_TRANSPOSE ) { if ( direct == FLA_FORWARD ) { if ( storev == FLA_COLUMNWISE ) r_val = FLA_Apply_Q_UT_lhfc( A, T, W, B, cntl ); else if ( storev == FLA_ROWWISE ) r_val = FLA_Apply_Q_UT_lhfr( A, T, W, B, cntl ); } else if ( direct == FLA_BACKWARD ) { if ( storev == FLA_COLUMNWISE ) r_val = FLA_Apply_Q_UT_lhbc( A, T, W, B, cntl ); else if ( storev == FLA_ROWWISE ) r_val = FLA_Apply_Q_UT_lhbr( A, T, W, B, cntl ); } } } else if ( side == FLA_RIGHT ) { if ( trans == FLA_NO_TRANSPOSE ) { if ( direct == FLA_FORWARD ) { if ( storev == FLA_COLUMNWISE ) r_val = FLA_Apply_Q_UT_rnfc( A, T, W, B, cntl ); else if ( storev == FLA_ROWWISE ) r_val = FLA_Apply_Q_UT_rnfr( A, T, W, B, cntl ); } else if ( direct == FLA_BACKWARD ) { if ( storev == FLA_COLUMNWISE ) r_val = FLA_Apply_Q_UT_rnbc( A, T, W, B, cntl ); else if ( storev == FLA_ROWWISE ) r_val = FLA_Apply_Q_UT_rnbr( A, T, W, B, cntl ); } } else if ( trans == FLA_TRANSPOSE || trans == FLA_CONJ_TRANSPOSE ) { if ( direct == FLA_FORWARD ) { if ( storev == FLA_COLUMNWISE ) r_val = FLA_Apply_Q_UT_rhfc( A, T, W, B, cntl ); else if ( storev == FLA_ROWWISE ) r_val = FLA_Apply_Q_UT_rhfr( A, T, W, B, cntl ); } else if ( direct == FLA_BACKWARD ) { if ( storev == FLA_COLUMNWISE ) r_val = FLA_Apply_Q_UT_rhbc( A, T, W, B, cntl ); else if ( storev == FLA_ROWWISE ) r_val = FLA_Apply_Q_UT_rhbr( A, T, W, B, cntl ); } } } } return r_val; }
FLA_Error FLA_Copy_internal( FLA_Obj A, FLA_Obj B, fla_copy_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Copy_internal_check( A, B, cntl ); if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_MATRIX && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { // Recurse r_val = FLA_Copy_internal( *FLASH_OBJ_PTR_AT( A ), *FLASH_OBJ_PTR_AT( B ), flash_copy_cntl ); } else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_Copy( A, B, cntl ); } else { if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && !FLASH_Queue_get_enabled( ) ) { // Execute leaf cntl = flash_copy_cntl_blas; } // Parameter combinations if ( FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { r_val = FLA_Copy_task( A, B, cntl ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 ) { r_val = FLA_Copy_blk_var1( A, B, cntl ); } #ifdef FLA_ENABLE_NON_CRITICAL_CODE else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 ) { r_val = FLA_Copy_blk_var2( A, B, cntl ); } #endif else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 ) { r_val = FLA_Copy_blk_var3( A, B, cntl ); } #ifdef FLA_ENABLE_NON_CRITICAL_CODE else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT4 ) { r_val = FLA_Copy_blk_var4( A, B, cntl ); } #endif else { r_val = FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); } } return r_val; }
FLA_Error FLA_Trsm_internal( FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trsm_internal_check( side, uplo, transa, diag, alpha, A, B, cntl ); if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_MATRIX && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { // Recurse r_val = FLA_Trsm_internal( side, uplo, transa, diag, alpha, *FLASH_OBJ_PTR_AT( A ), *FLASH_OBJ_PTR_AT( B ), flash_trsm_cntl_mm ); } else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_Trsm( side, uplo, transa, diag, alpha, A, B, cntl ); } else { if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && !FLASH_Queue_get_enabled( ) ) { // Execute leaf cntl = flash_trsm_cntl_blas; } // Parameter combinations if ( side == FLA_LEFT ) { if ( uplo == FLA_LOWER_TRIANGULAR ) { if ( transa == FLA_NO_TRANSPOSE ) r_val = FLA_Trsm_lln( diag, alpha, A, B, cntl ); else if ( transa == FLA_TRANSPOSE ) r_val = FLA_Trsm_llt( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Trsm_llc( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_TRANSPOSE ) r_val = FLA_Trsm_llh( diag, alpha, A, B, cntl ); } else if ( uplo == FLA_UPPER_TRIANGULAR ) { if ( transa == FLA_NO_TRANSPOSE ) r_val = FLA_Trsm_lun( diag, alpha, A, B, cntl ); else if ( transa == FLA_TRANSPOSE ) r_val = FLA_Trsm_lut( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Trsm_luc( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_TRANSPOSE ) r_val = FLA_Trsm_luh( diag, alpha, A, B, cntl ); } } else if ( side == FLA_RIGHT ) { if ( uplo == FLA_LOWER_TRIANGULAR ) { if ( transa == FLA_NO_TRANSPOSE ) r_val = FLA_Trsm_rln( diag, alpha, A, B, cntl ); else if ( transa == FLA_TRANSPOSE ) r_val = FLA_Trsm_rlt( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Trsm_rlc( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_TRANSPOSE ) r_val = FLA_Trsm_rlh( diag, alpha, A, B, cntl ); } else if ( uplo == FLA_UPPER_TRIANGULAR ) { if ( transa == FLA_NO_TRANSPOSE ) r_val = FLA_Trsm_run( diag, alpha, A, B, cntl ); else if ( transa == FLA_TRANSPOSE ) r_val = FLA_Trsm_rut( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_NO_TRANSPOSE ) r_val = FLA_Trsm_ruc( diag, alpha, A, B, cntl ); else if ( transa == FLA_CONJ_TRANSPOSE ) r_val = FLA_Trsm_ruh( diag, alpha, A, B, cntl ); } } } return r_val; }