FLA_Error FLA_Trsm_rut_unb_var4( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj BT, B0, BB, b1t, B2; FLA_Scal_external( alpha, B ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_BOTTOM ); while ( FLA_Obj_length( BB ) < FLA_Obj_length( B ) ){ FLA_Repart_2x1_to_3x1( BT, &B0, &b1t, /* ** */ /* *** */ BB, &B2, 1, FLA_TOP ); /*------------------------------------------------------------*/ /* b1t = b1t / triu( A' ); */ FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, diagA, A, b1t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &BT, B0, /* ** */ /* *** */ b1t, &BB, B2, FLA_BOTTOM ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_llc_unb_var4( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj BL, BR, B0, b1, B2; FLA_Scal_external( alpha, B ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); while ( FLA_Obj_width( BR ) < FLA_Obj_width( B ) ){ FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &b1, /**/ &B2, 1, FLA_LEFT ); /*------------------------------------------------------------*/ /* b1 = tril( A ) * b1; */ FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, diagA, A, b1 ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ b1, B2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Trsm_rlc_unb_var3( FLA_Diag diagA, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Obj BT, B0, BB, b1t, B2; FLA_Scal_external( alpha, B ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); while ( FLA_Obj_length( BT ) < FLA_Obj_length( B ) ){ FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* *** */ &b1t, BB, &B2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* b1t = b1t * tril( A ); */ FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, diagA, A, b1t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &BT, B0, b1t, /* ** */ /* *** */ &BB, B2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_LU_nopiv_unb_var3( FLA_Obj A ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) && FLA_Obj_width( ATL ) < FLA_Obj_width( A )){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); /*------------------------------------------------------------*/ // a01 = trilu( A00 ) \ a10 FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 ); // alpha11 = alpha11 - a10t * a01 FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 ); // a21 = a21 - A20 * a01 FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 ); // a21 = a21 / alpha11 FLA_Inv_scal_external( alpha11, a21 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); } if ( FLA_Obj_width( ATR ) > 0 ) // ATR = trilu( ATL ) \ ATR FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, FLA_ONE, ATL, ATR ); return FLA_SUCCESS; }
FLA_Error FLA_Chol_l_unb_var1( FLA_Obj A ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; int r_val = FLA_SUCCESS; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); /*------------------------------------------------------------*/ // a10t = a10t * inv( tril( A00 )' ) // a10t' = inv( conj( tril( A00 ) ) ) * a10t' FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t ); // alpha11 = alpha11 - a10t * a10t' FLA_Dotcs_external( FLA_CONJUGATE, FLA_MINUS_ONE, a10t, a10t, FLA_ONE, alpha11 ); // alpha11 = sqrt( alpha11 ) r_val = FLA_Sqrt( alpha11 ); if ( r_val != FLA_SUCCESS ) return ( FLA_Obj_length( A00 ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); } return r_val; }
FLA_Error FLA_Trsv( FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x ) { FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Trsv_check( uplo, trans, diag, A, x ); #ifdef FLA_ENABLE_BLAS2_FRONT_END_CNTL_TREES // Invoke FLA_Trsv_internal() with flat control tree that simply calls // external wrapper. r_val = FLA_Trsv_internal( uplo, trans, diag, A, x, fla_trsv_cntl_blas ); #else r_val = FLA_Trsv_external( uplo, trans, diag, A, x ); #endif return r_val; }
FLA_Error FLA_Trinv_ln_unb_var2( FLA_Obj A ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); /*------------------------------------------------------------*/ // a21 = tril( A22 ) \ a21; FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 ); // a21 = -a21 / alpha11; FLA_Scal_external( FLA_MINUS_ONE, a21 ); FLA_Inv_scal_external( alpha11, a21 ); // alpha11 = 1.0 / alpha11; FLA_Invert( FLA_NO_CONJUGATE, alpha11 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); } return FLA_SUCCESS; }
FLA_Error FLA_Trinv_lu_unb_var4( FLA_Obj A ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); /*------------------------------------------------------------*/ // a21 = -trilu( A22 ) \ a21; FLA_Scal_external( FLA_MINUS_ONE, a21 ); FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A22, a21 ); // A20 = -a21 * a10t + A20; FLA_Ger_external( FLA_MINUS_ONE, a21, a10t, A20 ); // a10t' = trilu( A00' ) * a10t'; FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_TRANSPOSE, FLA_UNIT_DIAG, A00, a10t ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); } return FLA_SUCCESS; }
FLA_Error FLA_Eig_gest_il_unb_var5( FLA_Obj A, FLA_Obj Y, FLA_Obj B ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj BTL, BTR, B00, b01, B02, BBL, BBR, b10t, beta11, b12t, B20, b21, B22; //FLA_Obj yT, y01, // yB, psi11, // y21; //FLA_Obj y21_l, y21_r; FLA_Obj psi11, y12t, y21, Y22; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x2( B, &BTL, &BTR, &BBL, &BBR, 0, 0, FLA_TL ); //FLA_Part_2x1( Y, &yT, // &yB, 0, FLA_TOP ); FLA_Part_2x2( Y, &psi11, &y12t, &y21, &Y22, 1, 1, FLA_TL ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, /**/ &b01, &B02, /* ************* */ /* ************************* */ &b10t, /**/ &beta11, &b12t, BBL, /**/ BBR, &B20, /**/ &b21, &B22, 1, 1, FLA_BR ); //FLA_Repart_2x1_to_3x1( yT, &y01, // /* ** */ /* ***** */ // &psi11, // yB, &y21, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ //FLA_Part_1x2( y21, &y21_l, &y21_r, 1, FLA_LEFT ); // alpha11 = inv(beta11) * alpha11 * inv(conj(beta11)); // = inv(beta11) * alpha11 * inv(beta11); FLA_Inv_scal_external( beta11, alpha11 ); FLA_Inv_scal_external( beta11, alpha11 ); //// y21 = b21 * alpha11; //FLA_Copy_external( b21, y21_l ); //FLA_Scal_external( alpha11, y21_l ); // psi11 = - 1/2 * alpha11; FLA_Copy_external( alpha11, psi11 ); FLA_Scal_external( FLA_MINUS_ONE_HALF, psi11 ); // a21 = a21 * inv(conj(beta11)); // = a21 * inv(beta11); FLA_Inv_scal_external( beta11, a21 ); //// a21 = a21 - 1/2 * y21; //FLA_Axpy_external( FLA_MINUS_ONE_HALF, y21_l, a21 ); // a21 = a21 - 1/2 * alpha11 * b21; FLA_Axpy_external( psi11, b21, a21 ); // A22 = A22 - a21 * b21' - b21 * a21'; FLA_Her2c_external( FLA_LOWER_TRIANGULAR, FLA_NO_CONJUGATE, FLA_MINUS_ONE, a21, b21, A22 ); //// a21 = a21 - 1/2 * y21; //FLA_Axpy_external( FLA_MINUS_ONE_HALF, y21_l, a21 ); // a21 = a21 - 1/2 * alpha11 * b21; FLA_Axpy_external( psi11, b21, a21 ); // a21 = inv( tril( B22 ) ) * a21; FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, B22, a21 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, b01, /**/ B02, b10t, beta11, /**/ b12t, /* ************** */ /* *********************** */ &BBL, /**/ &BBR, B20, b21, /**/ B22, FLA_TL ); //FLA_Cont_with_3x1_to_2x1( &yT, y01, // psi11, // /* ** */ /* ***** */ // &yB, y21, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_LU_piv_unb_var3( FLA_Obj A, FLA_Obj p ) { FLA_Obj ATL, ATR, A00, a01, A02, ABL, ABR, a10t, alpha11, a12t, A20, a21, A22; FLA_Obj AL, AR, A0, a1, A2; FLA_Obj pT, p0, pB, pi1, p2; FLA_Obj AB0, aB1; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); FLA_Part_2x1( p, &pT, &pB, 0, FLA_TOP ); while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) && FLA_Obj_width( ATL ) < FLA_Obj_width( A )){ FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02, /* ************* */ /* ************************** */ &a10t, /**/ &alpha11, &a12t, ABL, /**/ ABR, &A20, /**/ &a21, &A22, 1, 1, FLA_BR ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2, 1, FLA_RIGHT ); FLA_Repart_2x1_to_3x1( pT, &p0, /* ** */ /* *** */ &pi1, pB, &p2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ // Apply previously computed pivots FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 ); // a01 = trilu( A00 ) \ a01 FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 ); // alpha11 = alpha11 - a10t * a01 FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 ); // a21 = a21 - A20 * a01 FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 ); // aB1 = / alpha11 \ // \ a21 / FLA_Merge_2x1( alpha11, a21, &aB1 ); // Determine pivot index FLA_Amax_external( aB1, pi1 ); // Apply pivots to current column FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 ); // a21 = a21 / alpha11 FLA_Inv_scal_external( alpha11, a21 ); // AB0 = / a10t \ // \ A20 / FLA_Merge_2x1( a10t, A20, &AB0 ); // Apply pivots to previous columns FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02, a10t, alpha11, /**/ a12t, /* ************** */ /* ************************ */ &ABL, /**/ &ABR, A20, a21, /**/ A22, FLA_TL ); FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2, FLA_LEFT ); FLA_Cont_with_3x1_to_2x1( &pT, p0, pi1, /* ** */ /* *** */ &pB, p2, FLA_TOP ); } if ( FLA_Obj_width( ATR ) > 0 ) { /* Apply pivots to untouched columns */ FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR ); /* ATR = trilu( ATL ) \ ATR */ FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, FLA_ONE, ATL, ATR ); } return FLA_SUCCESS; }
FLA_Error REF_Trsv( FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x ) { FLA_Trsv_external( uplo, trans, diag, A, x ); return 0; }
void time_Chol_u( int variant, int type, int nrepeats, int n, int nb_alg, FLA_Obj A, FLA_Obj b, FLA_Obj b_orig, FLA_Obj norm, double *dtime, double *diff, double *gflops ) { int irep; double dtime_save = 1.0e9; FLA_Obj A_save, b_save, b_orig_save; fla_blocksize_t* bp; fla_chol_t* cntl_chol_var; fla_chol_t* cntl_chol_unb; fla_syrk_t* cntl_syrk_blas; fla_herk_t* cntl_herk_blas; fla_trsm_t* cntl_trsm_blas; fla_gemm_t* cntl_gemm_blas; /* if( type == FLA_ALG_UNBLOCKED && n > 400 ) { *gflops = 0.0; *diff = 0.0; return; } */ bp = FLA_Blocksize_create( nb_alg, nb_alg, nb_alg, nb_alg ); cntl_chol_unb = FLA_Cntl_chol_obj_create( FLA_FLAT, FLA_UNB_OPT_VARIANT2, NULL, NULL, NULL, NULL, NULL, NULL ); cntl_syrk_blas = FLA_Cntl_syrk_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL ); cntl_herk_blas = FLA_Cntl_herk_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL ); cntl_trsm_blas = FLA_Cntl_trsm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL, NULL ); cntl_gemm_blas = FLA_Cntl_gemm_obj_create( FLA_FLAT, FLA_SUBPROBLEM, NULL, NULL ); cntl_chol_var = FLA_Cntl_chol_obj_create( FLA_FLAT, variant, bp, cntl_chol_unb, cntl_syrk_blas, cntl_herk_blas, cntl_trsm_blas, cntl_gemm_blas ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, b, &b_save ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, b_orig, &b_orig_save ); FLA_Copy_external( A, A_save ); FLA_Copy_external( b, b_save ); FLA_Copy_external( b_orig, b_orig_save ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( A_save, A ); *dtime = FLA_Clock(); switch( variant ){ case 0: REF_Chol_u( A ); break; case 1:{ // Time variant 1 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Chol_u_unb_var1( A ); break; case FLA_ALG_UNB_OPT: FLA_Chol_u_opt_var1( A ); break; case FLA_ALG_BLOCKED: FLA_Chol_u_blk_var1( A, cntl_chol_var ); break; default: printf("trouble\n"); } break; } case 2:{ // Time variant 2 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Chol_u_unb_var2( A ); break; case FLA_ALG_UNB_OPT: FLA_Chol_u_opt_var2( A ); break; case FLA_ALG_BLOCKED: FLA_Chol_u_blk_var2( A, cntl_chol_var ); break; default: printf("trouble\n"); } break; } case 3:{ // Time variant 3 switch( type ){ case FLA_ALG_UNBLOCKED: FLA_Chol_u_unb_var3( A ); break; case FLA_ALG_UNB_OPT: FLA_Chol_u_opt_var3( A ); break; case FLA_ALG_BLOCKED: FLA_Chol_u_blk_var3( A, cntl_chol_var ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_save = min( *dtime, dtime_save ); } FLA_Cntl_obj_free( cntl_chol_var ); FLA_Cntl_obj_free( cntl_chol_unb ); FLA_Cntl_obj_free( cntl_syrk_blas ); FLA_Cntl_obj_free( cntl_herk_blas ); FLA_Cntl_obj_free( cntl_trsm_blas ); FLA_Cntl_obj_free( cntl_gemm_blas ); FLA_Blocksize_free( bp ); if ( type == FLA_ALG_REFERENCE ) { FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_UNIT_DIAG, A, b ); FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A, b ); FLA_Hemv_external( FLA_UPPER_TRIANGULAR, FLA_ONE, A_save, b, FLA_MINUS_ONE, b_orig ); FLA_Nrm2_external( b_orig, norm ); FLA_Copy_object_to_buffer( FLA_NO_TRANSPOSE, 0, 0, norm, 1, 1, diff, 1, 1 ); } else { FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_UNIT_DIAG, A, b ); FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, A, b ); FLA_Hemv_external( FLA_UPPER_TRIANGULAR, FLA_ONE, A_save, b, FLA_MINUS_ONE, b_orig ); FLA_Nrm2_external( b_orig, norm ); FLA_Copy_object_to_buffer( FLA_NO_TRANSPOSE, 0, 0, norm, 1, 1, diff, 1, 1 ); } *gflops = 1.0 / 3.0 * FLA_Obj_length( A ) * FLA_Obj_length( A ) * FLA_Obj_length( A ) / dtime_save / 1e9; if ( FLA_Obj_is_complex( A ) ) *gflops *= 4.0; *dtime = dtime_save; FLA_Copy_external( A_save, A ); FLA_Copy_external( b_save, b ); FLA_Copy_external( b_orig_save, b_orig ); FLA_Obj_free( &A_save ); FLA_Obj_free( &b_save ); FLA_Obj_free( &b_orig_save ); }