int Symm_ru_blk_var6( FLA_Obj A, FLA_Obj B, FLA_Obj C, int nb_alg ) { FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj BL, BR, B0, B1, B2; FLA_Obj CL, CR, C0, C1, C2; int b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_RIGHT ); while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ b = min( FLA_Obj_length( ATL ), nb_alg ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, &A10, &A11, /**/ &A12, /* ************* */ /* ******************** */ ABL, /**/ ABR, &A20, &A21, /**/ &A22, b, b, FLA_TL ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &B1, /**/ &B2, b, FLA_LEFT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, &C1, /**/ &C2, b, FLA_LEFT ); /*------------------------------------------------------------*/ /*C1 = B0 * A01 + B1 * A11 + B2 * A12' + C1;*/ FLA_Gemm(FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, B0, A01, FLA_ONE, C1); FLA_Gemm(FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, B1, A11, FLA_ONE, C1); FLA_Gemm(FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, B2, A12, FLA_ONE, C1); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, /* ************** */ /* ****************** */ A10, /**/ A11, A12, &ABL, /**/ &ABR, A20, /**/ A21, A22, FLA_BR ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ B1, B2, FLA_RIGHT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, /**/ C1, C2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Axpy_sync_circular( FLA_Obj alpha, FLA_Obj X, FLA_Obj B ) { FLA_Obj XL, XR, X0, X1, X2; FLA_Obj BL, BR, B0, B1, B2; int n_stages = FLA_omp_get_num_stages(); int stage_width = FLA_omp_compute_stage_width( X ); int thread_num = omp_get_thread_num(); int n_done = 0; int b, i; // Start thread i on the ith panel partition of B. FLA_Part_1x2( X, &XL, &XR, stage_width*thread_num, FLA_LEFT ); FLA_Part_1x2( B, &BL, &BR, stage_width*thread_num, FLA_LEFT ); while ( n_done++ < n_stages ){ // The last lockable partition may be smaller than the others. b = min( FLA_Obj_width( XR ), stage_width ); FLA_Repart_1x2_to_1x3( XL, /**/ XR, &X0, /**/ &X1, &X2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ // Get the index of the current partition. i = FLA_Obj_width(XL)/stage_width; // Acquire lock[i] (the lock for X1 and B1). omp_set_lock( &fla_omp_lock[i] ); // B1 := alpha * X1 + B1 FLA_Axpy_external( alpha, X1, B1 ); // Release lock[i] (the lock for X1 and B1). omp_unset_lock( &fla_omp_lock[i] ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &XL, /**/ &XR, X0, X1, /**/ X2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, FLA_LEFT ); // If this thread reaches the last partition, wrap back around to // the first partition for the next iteration. if( FLA_Obj_width( XL ) == FLA_Obj_width( X ) ) { FLA_Part_1x2( X, &XL, &XR, 0, FLA_LEFT ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); } } return FLA_SUCCESS; }
int Gemm_unb_var2( FLA_Obj A, FLA_Obj B, FLA_Obj C ) { FLA_Obj BL, BR, B0, b1, B2; FLA_Obj CL, CR, C0, c1, C2; FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) ){ FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1, &B2, 1, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &c1, &C2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A, b1, FLA_ONE, c1 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1, /**/ B2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, c1, /**/ C2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Symm_lu_unb_var10( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj BL, BR, B0, b1t, B2; FLA_Obj CL, CR, C0, c1t, C2; FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_RIGHT ); while ( FLA_Obj_width( BR ) < FLA_Obj_width( B ) ){ FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &b1t, /**/ &B2, 1, FLA_LEFT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, &c1t, /**/ &C2, 1, FLA_LEFT ); /*------------------------------------------------------------*/ /* c1t = c1t + A * b1t */ FLA_Symv_external( FLA_UPPER_TRIANGULAR, alpha, A, b1t, beta, c1t ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ b1t, B2, FLA_RIGHT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, /**/ c1t, C2, FLA_RIGHT ); } return FLA_SUCCESS; }
int Symm_ru_unb_var4( FLA_Obj A, FLA_Obj B, FLA_Obj C ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BL, BR, B0, b1, B2; FLA_Obj CL, CR, C0, c1, C2; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1, &B2, 1, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &c1, &C2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ //c1 = (b1 * alpha11) + c1; FLA_Axpy(alpha11, b1, c1); //c1 = (B2 * a12t') + c1; FLA_Gemv(FLA_NO_TRANSPOSE, FLA_ONE, B2, a12t, FLA_ONE, c1); //C2 = (b1 * a12t) + C2 FLA_Ger(FLA_ONE, a12t, b1, C0); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1, /**/ B2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, c1, /**/ C2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Axpy_sync_pipeline( FLA_Obj alpha, FLA_Obj X, FLA_Obj B ) { FLA_Obj XL, XR, X0, X1, X2; FLA_Obj BL, BR, B0, B1, B2; int b, i, nb_alg; FLA_Part_1x2( X, &XL, &XR, 0, FLA_LEFT ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); // Compute the width of one lockable partition. nb_alg = FLA_omp_compute_stage_width( X ); while ( FLA_Obj_width( XL ) < FLA_Obj_width( X ) ){ b = min( FLA_Obj_width( XR ), nb_alg ); FLA_Repart_1x2_to_1x3( XL, /**/ XR, &X0, /**/ &X1, &X2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ // Get the index of the current partition. i = FLA_Obj_width(XL)/nb_alg; // Acquire lock[i] (the lock for X1 and B1). omp_set_lock( &fla_omp_lock[i] ); // B1 := alpha * X1 + B1 FLA_Axpy_external( alpha, X1, B1 ); // Release lock[i] (the lock for X1 and B1). omp_unset_lock( &fla_omp_lock[i] ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &XL, /**/ &XR, X0, X1, /**/ X2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_llt_unb_var3( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( alpha, B ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) ){ FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1, &B2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* b1 = tril( A' ) \ b1; */ FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diagA, A, b1 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Herk_un_unb_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AL, AR, A0, a1, A2; FLA_Scalr_external( FLA_UPPER_TRIANGULAR, beta, C ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C := C + a1 * a1' */ FLA_Her_external( FLA_UPPER_TRIANGULAR, alpha, a1, C ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLASH_LQ_UT_solve( FLA_Obj A, FLA_Obj T, FLA_Obj B, FLA_Obj X ) { FLA_Obj W; FLA_Obj AL, AR; FLA_Obj XT, XB; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_LQ_UT_solve_check( A, T, B, X ); FLASH_Apply_Q_UT_create_workspace( T, X, &W ); FLA_Part_1x2( A, &AL, &AR, FLA_Obj_length( A ), FLA_LEFT ); FLA_Part_2x1( X, &XT, &XB, FLA_Obj_length( B ), FLA_TOP ); FLASH_Copy( B, XT ); FLASH_Trsm( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, AL, XT ); FLASH_Set( FLA_ZERO, XB ); FLASH_Apply_Q_UT( FLA_LEFT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE, A, T, W, X ); FLASH_Obj_free( &W ); return FLA_SUCCESS; }
FLA_Error FLA_Trmm_lun_unb_var4( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( alpha, B ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); while ( FLA_Obj_width( BR ) < FLA_Obj_width( B ) ){ FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &b1, /**/ &B2, 1, FLA_LEFT ); /*------------------------------------------------------------*/ /* b1 = triu( A ) * b1 */ FLA_Trmv_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diagA, A, b1 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ b1, B2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_lun_blk_var4( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t* cntl ) { FLA_Obj BL, BR, B0, B1, B2; dim_t b; FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); while ( FLA_Obj_width( BR ) < FLA_Obj_width( B ) ){ b = FLA_Determine_blocksize( BL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &B1, /**/ &B2, b, FLA_LEFT ); /*------------------------------------------------------------*/ /* B1 = triu( A ) \ B1; */ FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diagA, alpha, A, B1, FLA_Cntl_sub_trsm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ B1, B2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Herk_ln_blk_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_herk_t* cntl ) { FLA_Obj AL, AR, A0, A1, A2; dim_t b; FLA_Scalr_internal( FLA_LOWER_TRIANGULAR, beta, C, FLA_Cntl_sub_scalr( cntl ) ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ) { b = FLA_Determine_blocksize( AR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &A1, &A2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C = C + A1 * A1' */ FLA_Herk_internal( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, alpha, A1, FLA_ONE, C, FLA_Cntl_sub_herk( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, A1, /**/ A2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Trmm_rlt_blk_var2( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trmm_t* cntl ) { FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj BL, BR, B0, B1, B2; dim_t b; FLA_Scal_internal( alpha, B, FLA_Cntl_sub_scal( cntl ) ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( ATL, FLA_TL, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, &A10, &A11, /**/ &A12, /* ************* */ /* ******************** */ ABL, /**/ ABR, &A20, &A21, /**/ &A22, b, b, FLA_TL ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &B1, /**/ &B2, b, FLA_LEFT ); /*------------------------------------------------------------*/ /* B2 = B2 + B1 * A21'; */ FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, B1, A21, FLA_ONE, B2, FLA_Cntl_sub_gemm( cntl ) ); /* B1 = B1 * tril( A11 )'; */ FLA_Trmm_internal( FLA_RIGHT, FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, diagA, FLA_ONE, A11, B1, FLA_Cntl_sub_trmm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, /* ************** */ /* ****************** */ A10, /**/ A11, A12, &ABL, /**/ &ABR, A20, /**/ A21, A22, FLA_BR ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ B1, B2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Sylv_nn_blk_var17( FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t* cntl ) { FLA_Obj BTL, BTR, B00, B01, B02, BBL, BBR, B10, B11, B12, B20, B21, B22; FLA_Obj CL, CR, C0, C1, C2; dim_t b; FLA_Part_2x2( B, &BTL, &BTR, &BBL, &BBR, 0, 0, FLA_TL ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); while ( FLA_Obj_length( BTL ) < FLA_Obj_length( B ) ){ b = FLA_Determine_blocksize( CR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &B01, &B02, /* ************* */ /* ******************** */ &B10, /**/ &B11, &B12, BBL, /**/ BBR, &B20, /**/ &B21, &B22, b, b, FLA_BR ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2, b, FLA_RIGHT ); // Loop Invariant: // CL = // CR = /*------------------------------------------------------------*/ // C1 = sylv( A, B11, C1 -/+ C0 * B01 ); FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_NEGATE( isgn ), C0, B01, FLA_ONE, C1, FLA_Cntl_sub_gemm1( cntl ) ); FLA_Sylv_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, isgn, A, B11, C1, scale, FLA_Cntl_sub_sylv1( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, B01, /**/ B02, B10, B11, /**/ B12, /* ************** */ /* ****************** */ &BBL, /**/ &BBR, B20, B21, /**/ B22, FLA_TL ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_nt_blk_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl ) { FLA_Obj AL, AR, A0, A1, A2; FLA_Obj BL, BR, B0, B1, B2; dim_t b; FLA_Scal_internal( beta, C, FLA_Cntl_sub_scal( cntl ) ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ b = FLA_Determine_blocksize( AR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &A1, &A2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C = alpha * A1 * B1' + C; */ FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, alpha, A1, B1, FLA_ONE, C, FLA_Cntl_sub_gemm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, A1, /**/ A2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_QR_UT_unb_var1( FLA_Obj A, FLA_Obj t ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj tLt, tRt, t0t, tau1, t2t; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_1x2( t, &tLt, &tRt, 0, FLA_LEFT ); while ( FLA_Obj_min_dim( ABR ) > 0 ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_1x2_to_1x3( tLt, /**/ tRt, &t0t, /**/ &tau1, &t2t, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ // Compute tau11 and u21 from alpha11 and a21 such that tau11 and u21 // determine a Householder transform H such that applying H from the // left to the column vector consisting of alpha11 and a21 annihilates // the entries in a21 (and updates alpha11). FLA_Househ2_UT( FLA_LEFT, alpha11, a21, tau1 ); // / a12t \ = H / a12t \ // \ A22 / \ A22 / // // where H is formed from tau11 and u21. FLA_Apply_H2_UT( FLA_LEFT, tau1, a21, a12t, A22 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_1x3_to_1x2( &tLt, /**/ &tRt, t0t, tau1, /**/ t2t, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Syrk_ut_blk_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C, fla_syrk_t* cntl ) { FLA_Obj AL, AR, A0, A1, A2; FLA_Obj CTL, CTR, C00, C01, C02, CBL, CBR, C10, C11, C12, C20, C21, C22; dim_t b; FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_2x2( C, &CTL, &CTR, &CBL, &CBR, 0, 0, FLA_TL ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ b = FLA_Determine_blocksize( AR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &A1, &A2, b, FLA_RIGHT ); FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &C01, &C02, /* ************* */ /* ******************** */ &C10, /**/ &C11, &C12, CBL, /**/ CBR, &C20, /**/ &C21, &C22, b, b, FLA_BR ); /*------------------------------------------------------------*/ /* C12 = C12 + A1' * A2 */ FLA_Gemm_internal( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, alpha, A1, A2, beta, C12, FLA_Cntl_sub_gemm( cntl ) ); /* C11 = C11 + A1' * A1 */ FLA_Syrk_internal( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, alpha, A1, beta, C11, FLA_Cntl_sub_syrk( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, A1, /**/ A2, FLA_LEFT ); FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, C01, /**/ C02, C10, C11, /**/ C12, /* ************** */ /* ****************** */ &CBL, /**/ &CBR, C20, C21, /**/ C22, FLA_TL ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_tc_blk_var4( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl ) { FLA_Obj BL, BR, B0, B1, B2; FLA_Obj CL, CR, C0, C1, C2; dim_t b; FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_RIGHT ); while ( FLA_Obj_width( BR ) < FLA_Obj_width( B ) ){ b = FLA_Determine_blocksize( BL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &B1, /**/ &B2, b, FLA_LEFT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, &C1, /**/ &C2, b, FLA_LEFT ); /*------------------------------------------------------------*/ /* C1 = alpha * A' * B1 + C1; */ FLA_Gemm_internal( FLA_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A, B1, beta, C1, FLA_Cntl_sub_gemm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ B1, B2, FLA_RIGHT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, /**/ C1, C2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Hemm_lu_blk_var9( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_hemm_t* cntl ) { FLA_Obj BL, BR, B0, B1, B2; FLA_Obj CL, CR, C0, C1, C2; dim_t b; FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) ){ b = FLA_Determine_blocksize( BR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C1 = C1 + A * B1 */ FLA_Hemm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR, alpha, A, B1, beta, C1, FLA_Cntl_sub_hemm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_Q_UT_lhfr_blk_var2( FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t* cntl ) { FLA_Obj BL, BR, B0, B1, B2; FLA_Obj WL, WR, W0, W1, W2; dim_t b; FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); FLA_Part_1x2( W, &WL, &WR, 0, FLA_LEFT ); while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) ){ b = FLA_Determine_blocksize( BR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( WL, /**/ WR, &W0, /**/ &W1, &W2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ // B1 = Q' * B1; FLA_Apply_Q_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE, A, T, W1, B1, FLA_Cntl_sub_apqut( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &WL, /**/ &WR, W0, W1, /**/ W2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_nc_blk_var6( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl ) { FLA_Obj AL, AR, A0, A1, A2; FLA_Obj BT, B0, BB, B1, B2; dim_t b; FLA_Scal_internal( beta, C, FLA_Cntl_sub_scal( cntl ) ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_RIGHT ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_BOTTOM ); while ( FLA_Obj_width( AR ) < FLA_Obj_width( A ) ){ b = FLA_Determine_blocksize( AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, &A1, /**/ &A2, b, FLA_LEFT ); FLA_Repart_2x1_to_3x1( BT, &B0, &B1, /* ** */ /* ** */ BB, &B2, b, FLA_TOP ); /*------------------------------------------------------------*/ /* C = alpha * A1 * B1 + C; */ FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_NO_TRANSPOSE, alpha, A1, B1, FLA_ONE, C, FLA_Cntl_sub_gemm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, /**/ A1, A2, FLA_RIGHT ); FLA_Cont_with_3x1_to_2x1( &BT, B0, /* ** */ /* ** */ B1, &BB, B2, FLA_BOTTOM ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_hh_blk_var6( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t* cntl ) { FLA_Obj AT, A0, AB, A1, A2; FLA_Obj BL, BR, B0, B1, B2; dim_t b; FLA_Scal_internal( beta, C, FLA_Cntl_sub_scal( cntl ) ); FLA_Part_2x1( A, &AT, &AB, 0, FLA_BOTTOM ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); while ( FLA_Obj_length( AB ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( AT, &A0, &A1, /* ** */ /* ** */ AB, &A2, b, FLA_TOP ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &B1, /**/ &B2, b, FLA_LEFT ); /*------------------------------------------------------------*/ /* C = alpha * A1' * B1' + C; */ FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE, alpha, A1, B1, FLA_ONE, C, FLA_Cntl_sub_gemm( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, /* ** */ /* ** */ A1, &AB, A2, FLA_BOTTOM ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ B1, B2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Copyt_c_blk_var3( FLA_Obj A, FLA_Obj B, fla_copyt_t* cntl ) { FLA_Obj AL, AR, A0, A1, A2; FLA_Obj BL, BR, B0, B1, B2; dim_t b; FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ b = FLA_Determine_blocksize( AR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &A1, &A2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ FLA_Copyt_internal( FLA_CONJ_NO_TRANSPOSE, A1, B1, FLA_Cntl_sub_copyt( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, A1, /**/ A2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Syr2k_un_unb_var9( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AL, AR, A0, a1t, A2; FLA_Obj BL, BR, B0, b1t, B2; FLA_Scalr_external( FLA_UPPER_TRIANGULAR, beta, C ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1t, &A2, 1, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1t, &B2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C = C + a1t * b1t' + b1t * a1t' */ FLA_Syr2_external( FLA_UPPER_TRIANGULAR, alpha, a1t, b1t, C ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1t, /**/ A2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1t, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Trmm_run_unb_var2( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( alpha, B ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &a01, /**/ &A02, &a10t, &alpha11, /**/ &a12t, /* ************* */ /* ************************** */ ABL, /**/ ABR, &A20, &a21, /**/ &A22, 1, 1, FLA_TL ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &b1, /**/ &B2, 1, FLA_LEFT ); /*------------------------------------------------------------*/ /* B2 = B2 + b1 * a12t; */ FLA_Ger_external( FLA_ONE, b1, a12t, B2 ); /* b1 = b1 * alpha11; */ if ( diagA != FLA_UNIT_DIAG ) FLA_Scal_external( alpha11, b1 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ a01, A02, /* ************** */ /* ************************ */ a10t, /**/ alpha11, a12t, &ABL, /**/ &ABR, A20, /**/ a21, A22, FLA_BR ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ b1, B2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Herk_lh_unb_var1( FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AL, AR, A0, a1, A2; FLA_Obj CTL, CTR, C00, c01, C02, CBL, CBR, c10t, gamma11, c12t, C20, c21, C22; FLA_Scalr_external( FLA_LOWER_TRIANGULAR, beta, C ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_2x2( C, &CTL, &CTR, &CBL, &CBR, 0, 0, FLA_TL ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ){ FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2, 1, FLA_RIGHT ); FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, /**/ &c01, &C02, /* ************* */ /* ************************** */ &c10t, /**/ &gamma11, &c12t, CBL, /**/ CBR, &C20, /**/ &c21, &C22, 1, 1, FLA_BR ); /*------------------------------------------------------------*/ /* C10 = C10 + A1' * A0 */ /* c10t = c10t + A0' * a1 */ FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, alpha, A0, a1, FLA_ONE, c10t ); /* gamma11 = gamma11 + a1' * a1 */ FLA_Dotcs_external( FLA_CONJUGATE, alpha, a1, a1, FLA_ONE, gamma11 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2, FLA_LEFT ); FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, c01, /**/ C02, c10t, gamma11, /**/ c12t, /* ************** */ /* ************************ */ &CBL, /**/ &CBR, C20, c21, /**/ C22, FLA_TL ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_rlc_unb_var1( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( alpha, B ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); while ( FLA_Obj_length( ABR ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &a01, /**/ &A02, &a10t, &alpha11, /**/ &a12t, /* ************* */ /* ************************** */ ABL, /**/ ABR, &A20, &a21, /**/ &A22, 1, 1, FLA_TL ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &b1, /**/ &B2, 1, FLA_LEFT ); /*------------------------------------------------------------*/ /* b1 = b1 - B2 * a21; */ FLA_Gemvc_external( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, B2, a21, FLA_ONE, b1 ); /* b1 = b1 / alpha11; */ if ( diagA != FLA_UNIT_DIAG ) FLA_Inv_scalc_external( FLA_CONJUGATE, alpha11, b1 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ a01, A02, /* ************** */ /* ************************ */ a10t, /**/ alpha11, a12t, &ABL, /**/ &ABR, A20, /**/ a21, A22, FLA_BR ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ b1, B2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_nn_unb_var3( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj BL, BR, B0, b1, B2; FLA_Obj CL, CR, C0, c1, C2; FLA_Scal_external( beta, C ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) ){ FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1, &B2, 1, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &c1, &C2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* c1 = A * b1 + c1 */ FLA_Gemv_external( FLA_NO_TRANSPOSE, alpha, A, b1, FLA_ONE, c1 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1, /**/ B2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, c1, /**/ C2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Gemm_ct_unb_var5( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AL, AR, A0, a1, A2; FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( beta, C ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_LEFT ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ) { FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2, 1, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &b1, &B2, 1, FLA_RIGHT ); /*------------------------------------------------------------*/ /* C = a1 * b1' + C */ FLA_Gerc_external( FLA_CONJUGATE, FLA_NO_CONJUGATE, alpha, a1, b1, C ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, b1, /**/ B2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Syrk_ut_unb_var3( FLA_Obj alpha, FLA_Obj A, FLA_Obj beta, FLA_Obj C ) { FLA_Obj AL, AR, A0, a1, A2; FLA_Obj CTL, CTR, C00, c01, C02, CBL, CBR, c10t, gamma11, c12t, C20, c21, C22; FLA_Scalr_external( FLA_UPPER_TRIANGULAR, beta, C ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_RIGHT ); FLA_Part_2x2( C, &CTL, &CTR, &CBL, &CBR, 0, 0, FLA_BR ); while ( FLA_Obj_width( AR ) < FLA_Obj_width( A ) ){ FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, &a1, /**/ &A2, 1, FLA_LEFT ); FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &c01, /**/ &C02, &c10t, &gamma11, /**/ &c12t, /* ************* */ /* ************************** */ CBL, /**/ CBR, &C20, &c21, /**/ &C22, 1, 1, FLA_TL ); /*------------------------------------------------------------*/ /* c12t = c12t + A2' * a1 */ FLA_Gemv_external( FLA_TRANSPOSE, alpha, A2, a1, FLA_ONE, c12t ); /* gamma11 = gamma11 + a1 * a1' */ FLA_Dots_external( alpha, a1, a1, FLA_ONE, gamma11 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, /**/ a1, A2, FLA_RIGHT ); FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ c01, C02, /* ************** */ /* ************************ */ c10t, /**/ gamma11, c12t, &CBL, /**/ &CBR, C20, /**/ c21, C22, FLA_BR ); } return FLA_SUCCESS; }