FLA_Error FLA_Trmm_ruh_unb_var4( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj BT, B0, BB, b1t, B2; FLA_Scal_external( alpha, B ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_BOTTOM ); while ( FLA_Obj_length( BB ) < FLA_Obj_length( B ) ){ FLA_Repart_2x1_to_3x1( BT, &B0, &b1t, /* ** */ /* *** */ BB, &B2, 1, FLA_TOP ); /*------------------------------------------------------------*/ /* b1t = b1t * triu( A )'; */ FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diagA, A, b1t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &BT, B0, /* ** */ /* *** */ b1t, &BB, B2, FLA_BOTTOM ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_rlh_unb_var3( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj BT, B0, BB, b1t, B2; FLA_Scal_external( alpha, B ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); while ( FLA_Obj_length( BT ) < FLA_Obj_length( B ) ){ FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* *** */ &b1t, BB, &B2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* b1t = b1t / tril( A' ); */ FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diagA, A, b1t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &BT, B0, b1t, /* ** */ /* *** */ &BB, B2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_luh_unb_var3( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( alpha, B ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) ){ FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1, &B2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* b1 = triu( A' ) \ b1 */ FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diagA, A, b1 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Trmm_llt_unb_var4( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( alpha, B ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); while ( FLA_Obj_width( BR ) < FLA_Obj_width( B ) ){ FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &b1, /**/ &B2, 1, FLA_LEFT ); /*------------------------------------------------------------*/ /* b1 = tril( A )' * b1 */ FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diagA, A, b1 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ b1, B2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_luc_unb_var1( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BT, B0, BB, b1t, B2; FLA_Scal_external( alpha, B ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_BOTTOM ); while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &a01, /**/ &A02, &a10t, &alpha11, /**/ &a12t, /* ************* */ /* ************************** */ ABL, /**/ ABR, &A20, &a21, /**/ &A22, 1, 1, FLA_TL ); FLA_Repart_2x1_to_3x1( BT, &B0, &b1t, /* ** */ /* *** */ BB, &B2, 1, FLA_TOP ); /*------------------------------------------------------------*/ /* b1t = b1t - a12t * B2; */ FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, B2, a12t, FLA_ONE, b1t ); /* b1t = b1t / alpha11; */ if ( diagA != FLA_UNIT_DIAG ) FLA_Inv_scalc_external( FLA_CONJUGATE, alpha11, b1t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ a01, A02, /* ************** */ /* ************************ */ a10t, /**/ alpha11, a12t, &ABL, /**/ &ABR, A20, /**/ a21, A22, FLA_BR ); FLA_Cont_with_3x1_to_2x1( &BT, B0, /* ** */ /* *** */ b1t, &BB, B2, FLA_BOTTOM ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_nh_unb_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AT, A0, AB, a1t, A2; FLA_Obj CT, C0, CB, c1t, C2; FLA_Scal_external( beta, C ); FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( C, &CT, &CB, 0, FLA_TOP ); while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* *** */ &a1t, AB, &A2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( CT, &C0, /* ** */ /* *** */ &c1t, CB, &C2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* c1t = a1t * B' + c1t */ /* c1t' = B * a1t' + c1t' */ FLA_Gemv_external( FLA_CONJ_NO_TRANSPOSE, alpha, B, a1t, FLA_ONE, c1t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, a1t, /* ** */ /* *** */ &AB, A2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &CT, C0, c1t, /* ** */ /* *** */ &CB, C2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_tn_unb_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AT, A0, AB, a1t, A2; FLA_Obj BT, B0, BB, b1t, B2; FLA_Scal_external( beta, C ); FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* *** */ &a1t, AB, &A2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* *** */ &b1t, BB, &B2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* C = a1t' * b1t + C */ FLA_Ger_external( alpha, a1t, b1t, C ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, a1t, /* ** */ /* *** */ &AB, A2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &BT, B0, b1t, /* ** */ /* *** */ &BB, B2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Trmm_ruh_unb_var1( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( alpha, B ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1, &B2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* b1 = b1 * alpha11; */ if ( diagA != FLA_UNIT_DIAG ) FLA_Scalc_external( FLA_CONJUGATE, alpha11, b1 ); /* b1 = b1 + B2 * a12t'; */ FLA_Gemvc_external( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, B2, a12t, FLA_ONE, b1 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_ruh_unb_var2( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( alpha, B ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &a01, /**/ &A02, &a10t, &alpha11, /**/ &a12t, /* ************* */ /* ************************** */ ABL, /**/ ABR, &A20, &a21, /**/ &A22, 1, 1, FLA_TL ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &b1, /**/ &B2, 1, FLA_LEFT ); /*------------------------------------------------------------*/ /* b1 = b1 / alpha11'; */ if ( diagA != FLA_UNIT_DIAG ) FLA_Inv_scalc_external( FLA_CONJUGATE, alpha11, b1 ); /* B0 = B0 - b1 * a01'; */ FLA_Gerc_external( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, b1, a01, B0 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ a01, A02, /* ************** */ /* ************************ */ a10t, /**/ alpha11, a12t, &ABL, /**/ &ABR, A20, /**/ a21, A22, FLA_BR ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ b1, B2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_nt_unb_var3( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj BT, B0, BB, b1t, B2; FLA_Obj CL, CR, C0, c1, C2; FLA_Scal_external( beta, C ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); while ( FLA_Obj_length( BT ) < FLA_Obj_length( B ) ){ FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* *** */ &b1t, BB, &B2, 1, FLA_BOTTOM ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &c1, &C2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* c1 = A * b1t + c1 */ FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A, b1t, FLA_ONE, c1 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &BT, B0, b1t, /* ** */ /* *** */ &BB, B2, FLA_TOP ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, c1, /**/ C2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_hh_unb_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AL, AR, A0, a1, A2; FLA_Obj CT, C0, CB, c1t, C2; FLA_Scal_external( beta, C ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_2x1( C, &CT, &CB, 0, FLA_TOP ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2, 1, FLA_RIGHT ); FLA_Repart_2x1_to_3x1( CT, &C0, /* ** */ /* *** */ &c1t, CB, &C2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* c1t = a1' * B' + c1t */ /* c1t' = B * a1 + c1t' */ FLA_Gemvc_external( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, alpha, B, a1, FLA_ONE, c1t ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2, FLA_LEFT ); FLA_Cont_with_3x1_to_2x1( &CT, C0, c1t, /* ** */ /* *** */ &CB, C2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_tt_unb_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AT, A0, AB, a1t, A2; FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( beta, C ); FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* *** */ &a1t, AB, &A2, 1, FLA_BOTTOM ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1, &B2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C = a1t' * b1' + C */ FLA_Ger_external( alpha, a1t, b1, C ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, a1t, /* ** */ /* *** */ &AB, A2, FLA_TOP ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_nc_unb_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AL, AR, A0, a1, A2; FLA_Obj BT, B0, BB, b1t, B2; FLA_Scal_external( beta, C ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2, 1, FLA_RIGHT ); FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* *** */ &b1t, BB, &B2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* C = a1 * b1t + C */ FLA_Gerc_external( FLA_NO_CONJUGATE, FLA_CONJUGATE, alpha, a1, b1t, C ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2, FLA_LEFT ); FLA_Cont_with_3x1_to_2x1( &BT, B0, b1t, /* ** */ /* *** */ &BB, B2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Trinv_ln_unb_var3( FLA_Obj A ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); /*------------------------------------------------------------*/ // a21 = -a21 / alpha11; FLA_Scal_external( FLA_MINUS_ONE, a21 ); FLA_Inv_scal_external( alpha11, a21 ); // A20 = a21 * a10t + A20; FLA_Ger_external( FLA_ONE, a21, a10t, A20 ); // a10t = a10t / alpha11; FLA_Inv_scal_external( alpha11, a10t ); // alpha11 = 1.0 / alpha11; FLA_Invert( FLA_NO_CONJUGATE, alpha11 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); } return FLA_SUCCESS; }
FLA_Error FLA_Trinv_lu_unb_var4( FLA_Obj A ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); /*------------------------------------------------------------*/ // a21 = -trilu( A22 ) \ a21; FLA_Scal_external( FLA_MINUS_ONE, a21 ); FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A22, a21 ); // A20 = -a21 * a10t + A20; FLA_Ger_external( FLA_MINUS_ONE, a21, a10t, A20 ); // a10t' = trilu( A00' ) * a10t'; FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_UNIT_DIAG, A00, a10t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); } return FLA_SUCCESS; }
FLA_Error FLA_Trinv_ln_unb_var2( FLA_Obj A ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); /*------------------------------------------------------------*/ // a21 = tril( A22 ) \ a21; FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 ); // a21 = -a21 / alpha11; FLA_Scal_external( FLA_MINUS_ONE, a21 ); FLA_Inv_scal_external( alpha11, a21 ); // alpha11 = 1.0 / alpha11; FLA_Invert( FLA_NO_CONJUGATE, alpha11 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); } return FLA_SUCCESS; }
FLA_Error FLA_Trmvsx_external( FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Obj x_copy; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trmvsx_check( uplo, transa, diag, alpha, A, x, beta, y ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, x, &x_copy ); FLA_Copy_external( x, x_copy ); FLA_Trmv_external( uplo, transa, diag, A, x_copy ); FLA_Scal_external( beta, y ); FLA_Axpy_external( alpha, x_copy, y ); FLA_Obj_free( &x_copy ); return FLA_SUCCESS; }
FLA_Error FLA_Gemm_nn_unb_var3( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj BL, BR, B0, b1, B2; FLA_Obj CL, CR, C0, c1, C2; FLA_Scal_external( beta, C ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) ){ FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1, &B2, 1, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &c1, &C2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* c1 = A * b1 + c1 */ FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A, b1, FLA_ONE, c1 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1, /**/ B2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, c1, /**/ C2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_ct_unb_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AL, AR, A0, a1, A2; FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( beta, C ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ) { FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2, 1, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1, &B2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C = a1 * b1' + C */ FLA_Gerc_external( FLA_CONJUGATE, FLA_NO_CONJUGATE, alpha, a1, b1, C ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Hemm_ll_unb_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BT, B0, BB, b1t, B2; FLA_Obj CT, C0, CB, c1t, C2; FLA_Scal_external( beta, C ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_BOTTOM ); FLA_Part_2x1( C, &CT, &CB, 0, FLA_BOTTOM ); while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &a01, /**/ &A02, &a10t, &alpha11, /**/ &a12t, /* ************* */ /* ************************** */ ABL, /**/ ABR, &A20, &a21, /**/ &A22, 1, 1, FLA_TL ); FLA_Repart_2x1_to_3x1( BT, &B0, &b1t, /* ** */ /* ** */ BB, &B2, 1, FLA_TOP ); FLA_Repart_2x1_to_3x1( CT, &C0, &c1t, /* ** */ /* ** */ CB, &C2, 1, FLA_TOP ); /*------------------------------------------------------------*/ /* c1t = c1t + alpha11 * b1t */ FLA_Axpys_external( alpha, alpha11, b1t, FLA_ONE, c1t ); /* c1t = c1t + a21' * B2 */ /* c1t' = c1t' + B2' * a21 */ FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, alpha, B2, a21, FLA_ONE, c1t ); /* C2 = C2 + a21 * b1t */ FLA_Ger_external( alpha, a21, b1t, C2 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ a01, A02, /* ************** */ /* ************************ */ a10t, /**/ alpha11, a12t, &ABL, /**/ &ABR, A20, /**/ a21, A22, FLA_BR ); FLA_Cont_with_3x1_to_2x1( &BT, B0, /* ** */ /* ** */ b1t, &BB, B2, FLA_BOTTOM ); FLA_Cont_with_3x1_to_2x1( &CT, C0, /* ** */ /* ** */ c1t, &CB, C2, FLA_BOTTOM ); } return FLA_SUCCESS; }
FLA_Error FLA_Symm_rl_unb_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BL, BR, B0, b1t, B2; FLA_Obj CL, CR, C0, c1t, C2; FLA_Scal_external( beta, C ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1t, &B2, 1, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &c1t, &C2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C0 = C0 + b1t * a10t */ FLA_Ger_external( alpha, b1t, a10t, C0 ); /* c1t = c1t + B0 * a10t' */ FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, B0, a10t, FLA_ONE, c1t ); /* c1t = c1t + b1t * alpha11 */ FLA_Axpys_external( alpha, alpha11, b1t, FLA_ONE, c1t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1t, /**/ B2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, c1t, /**/ C2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Her2k_external( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Datatype datatype; int k_AB; int m_A, n_A; int m_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; uplo_t blis_uplo; trans_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) ) { FLA_Scal_external( beta, C ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); if ( trans == FLA_NO_TRANSPOSE ) k_AB = n_A; else k_AB = m_A; FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_ssyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dsyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_cher2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_zher2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Gemv_external( FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x; int inc_y; trans_t blis_transa; conj_t blis_conjx; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Gemv_check( transa, alpha, A, x, beta, y ); if ( FLA_Obj_has_zero_dim( A ) ) { FLA_Scal_external( beta, y ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_trans( transa, &blis_transa ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_sgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bli_cgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bli_zgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Symm_lu_unb_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BT, B0, BB, b1t, B2; FLA_Obj CT, C0, CB, c1t, C2; FLA_Scal_external( beta, C ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); FLA_Part_2x1( C, &CT, &CB, 0, FLA_TOP ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* ** */ &b1t, BB, &B2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( CT, &C0, /* ** */ /* ** */ &c1t, CB, &C2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* C0 = C0 + a01 * b1t */ FLA_Ger_external( alpha, a01, b1t, C0 ); /* c1t = c1t + a01' * B0 */ /* c1t' = c1t' + B0' * a01 */ FLA_Gemv_external( FLA_TRANSPOSE, alpha, B0, a01, FLA_ONE, c1t ); /* c1t = c1t + alpha11 * b1t */ FLA_Axpys_external( alpha, alpha11, b1t, FLA_ONE, c1t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_3x1_to_2x1( &BT, B0, b1t, /* ** */ /* ** */ &BB, B2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &CT, C0, c1t, /* ** */ /* ** */ &CB, C2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Eig_gest_il_unb_var5( FLA_Obj A, FLA_Obj Y, FLA_Obj B ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BTL, BTR, B00, b01, B02, BBL, BBR, b10t, beta11, b12t, B20, b21, B22; //FLA_Obj yT, y01, // yB, psi11, // y21; //FLA_Obj y21_l, y21_r; FLA_Obj psi11, y12t, y21, Y22; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x2( B, &BTL, &BTR, &BBL, &BBR, 0, 0, FLA_TL ); //FLA_Part_2x1( Y, &yT, // &yB, 0, FLA_TOP ); FLA_Part_2x2( Y, &psi11, &y12t, &y21, &Y22, 1, 1, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &b01, &B02, /* ************* */ /* ************************* */ &b10t, /**/ &beta11, &b12t, BBL, /**/ BBR, &B20, /**/ &b21, &B22, 1, 1, FLA_BR ); //FLA_Repart_2x1_to_3x1( yT, &y01, // /* ** */ /* ***** */ // &psi11, // yB, &y21, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ //FLA_Part_1x2( y21, &y21_l, &y21_r, 1, FLA_LEFT ); // alpha11 = inv(beta11) * alpha11 * inv(conj(beta11)); // = inv(beta11) * alpha11 * inv(beta11); FLA_Inv_scal_external( beta11, alpha11 ); FLA_Inv_scal_external( beta11, alpha11 ); //// y21 = b21 * alpha11; //FLA_Copy_external( b21, y21_l ); //FLA_Scal_external( alpha11, y21_l ); // psi11 = - 1/2 * alpha11; FLA_Copy_external( alpha11, psi11 ); FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 ); // a21 = a21 * inv(conj(beta11)); // = a21 * inv(beta11); FLA_Inv_scal_external( beta11, a21 ); //// a21 = a21 - 1/2 * y21; //FLA_Axpy_external( FLA_MINUS_ONE_HALF, y21_l, a21 ); // a21 = a21 - 1/2 * alpha11 * b21; FLA_Axpy_external( psi11, b21, a21 ); // A22 = A22 - a21 * b21' - b21 * a21'; FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_NO_CONJUGATE, FLA_MINUS_ONE, a21, b21, A22 ); //// a21 = a21 - 1/2 * y21; //FLA_Axpy_external( FLA_MINUS_ONE_HALF, y21_l, a21 ); // a21 = a21 - 1/2 * alpha11 * b21; FLA_Axpy_external( psi11, b21, a21 ); // a21 = inv( tril( B22 ) ) * a21; FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, B22, a21 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, b01, /**/ B02, b10t, beta11, /**/ b12t, /* ************** */ /* *********************** */ &BBL, /**/ &BBR, B20, b21, /**/ B22, FLA_TL ); //FLA_Cont_with_3x1_to_2x1( &yT, y01, // psi11, // /* ** */ /* ***** */ // &yB, y21, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Eig_gest_nl_unb_var4( FLA_Obj A, FLA_Obj Y, FLA_Obj B ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BTL, BTR, B00, b01, B02, BBL, BBR, b10t, beta11, b12t, B20, b21, B22; //FLA_Obj yL, yR, y10t, psi11, y12t; //FLA_Obj y10t_t, // y10t_b; FLA_Obj psi11, y12t, y21, Y22; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x2( B, &BTL, &BTR, &BBL, &BBR, 0, 0, FLA_TL ); //FLA_Part_1x2( Y, &yL, &yR, 0, FLA_LEFT ); FLA_Part_2x2( Y, &psi11, &y12t, &y21, &Y22, 1, 1, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &b01, &B02, /* ************* */ /* ************************* */ &b10t, /**/ &beta11, &b12t, BBL, /**/ BBR, &B20, /**/ &b21, &B22, 1, 1, FLA_BR ); //FLA_Repart_1x2_to_1x3( yL, /**/ yR, &y10t, /**/ &psi11, &y12t, // 1, FLA_RIGHT ); /*------------------------------------------------------------*/ //FLA_Part_2x1( y10t, &y10t_t, // &y10t_b, 1, FLA_TOP ); //// y10t = alpha11 * b10t; //FLA_Copy_external( b10t, y10t_t ); //FLA_Scal_external( alpha11, y10t_t ); // psi11 = 1/2 * alpha11; FLA_Copy_external( alpha11, psi11 ); FLA_Scal_external( FLA_ONE_HALF, psi11 ); //// a10t = a10t + 1/2 * y10t; //FLA_Axpy_external( FLA_ONE_HALF, y10t_t, a10t ); // a10t = a10t + 1/2 * alpha11 * b10t; FLA_Axpy_external( psi11, b10t, a10t ); // A00 = A00 + a10t' * b10t + b10t' * a10t; FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_CONJUGATE, FLA_ONE, a10t, b10t, A00 ); //// a10t = a10t + 1/2 * y10t; //FLA_Axpy_external( FLA_ONE_HALF, y10t_t, a10t ); // a10t = a10t + 1/2 * alpha11 * b10t; FLA_Axpy_external( psi11, b10t, a10t ); // a10t = conj(beta11) * a10t; // = beta11 * a10t; FLA_Scal_external( beta11, a10t ); // alpha11 = conj(beta11) * alpha11 * beta11; // = beta11 * alpha11 * beta11; FLA_Scal_external( beta11, alpha11 ); FLA_Scal_external( beta11, alpha11 ); // A20 = A20 + a21 * b10t; FLA_Ger_external( FLA_ONE, a21, b10t, A20 ); // a21 = a21 * beta11; FLA_Scal_external( beta11, a21 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, b01, /**/ B02, b10t, beta11, /**/ b12t, /* ************** */ /* *********************** */ &BBL, /**/ &BBR, B20, b21, /**/ B22, FLA_TL ); //FLA_Cont_with_1x3_to_1x2( &yL, /**/ &yR, y10t, psi11, /**/ y12t, // FLA_LEFT ); } return FLA_SUCCESS; }