FLA_Error FLA_Scalc_check( FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A ) { FLA_Error e_val; e_val = FLA_Check_valid_conj( conjalpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_floating_object( A ); FLA_Check_error_code( e_val ); e_val = FLA_Check_nonconstant_object( A ); FLA_Check_error_code( e_val ); if ( FLA_Obj_is_real( A ) ) { e_val = FLA_Check_consistent_object_datatype( A, alpha ); FLA_Check_error_code( e_val ); } else { e_val = FLA_Check_identical_object_precision( A, alpha ); FLA_Check_error_code( e_val ); } e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); return FLA_SUCCESS; }
void time_QR_UT( int variant, int type, int nrepeats, int m, int n, FLA_Obj A, FLA_Obj A_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W, FLA_Obj b, FLA_Obj b_orig, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj A_save, b_save, norm; FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, b, &b_save ); if ( FLA_Obj_is_single_precision( A ) ) FLA_Obj_create( FLA_FLOAT, 1, 1, 0, 0, &norm ); else FLA_Obj_create( FLA_DOUBLE, 1, 1, 0, 0, &norm ); FLA_Copy_external( A, A_save ); FLA_Copy_external( b, b_save ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( A_save, A ); *dtime = FLA_Clock(); switch( variant ){ case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_QR_UT( A, t ); break; case FLA_ALG_FRONT: FLA_QR_UT( A, T ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( type == FLA_ALG_REFERENCE ) { FLA_Obj AT, AB; FLA_Obj bT, bB; FLA_Obj y; FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y ); FLA_Copy_external( b, b_orig ); if ( FLA_Obj_is_real( A ) ) FLA_Apply_Q_blk_external( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A, t, b ); else FLA_Apply_Q_blk_external( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, A, t, b ); FLA_Part_2x1( A, &AT, &AB, FLA_Obj_width( A ), FLA_TOP ); FLA_Part_2x1( b, &bT, &bB, FLA_Obj_width( A ), FLA_TOP ); FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, AT, bT ); FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, bT, FLA_ONE, b_orig ); FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y ); FLA_Nrm2_external( y, norm ); FLA_Obj_extract_real_scalar( norm, diff ); FLA_Obj_free( &y ); } else { FLA_Obj x, y; FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &y ); FLA_Obj_create( FLA_Obj_datatype( b ), n, 1, 0, 0, &x ); FLA_Copy_external( b, b_orig ); FLA_QR_UT_solve( A, T, b, x ); FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A_save, x, FLA_ONE, b_orig ); FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, b_orig, FLA_ZERO, y ); FLA_Nrm2_external( y, norm ); FLA_Obj_extract_real_scalar( norm, diff ); FLA_Obj_free( &x ); FLA_Obj_free( &y ); } *gflops = ( 2.0 * m * n * n - ( 2.0 / 3.0 ) * n * n * n ) / dtime_old / 1e9; if ( FLA_Obj_is_complex( A ) ) *gflops *= 4.0; *dtime = dtime_old; FLA_Copy_external( A_save, A ); FLA_Copy_external( b_save, b ); FLA_Obj_free( &A_save ); FLA_Obj_free( &b_save ); FLA_Obj_free( &norm ); }
FLA_Error FLA_Bidiag_apply_V_external( FLA_Side side, FLA_Trans trans, FLA_Obj A, FLA_Obj t, FLA_Obj B ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; // int m_A, n_A; int m_B, n_B; int cs_A; int cs_B; int k_t; int lwork; FLA_Obj work; char blas_side; char blas_vect = 'P'; char blas_trans; int i; //if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) // FLA_Apply_Q_check( side, trans, storev, A, t, B ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); // m_A = FLA_Obj_length( A ); // n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); cs_B = FLA_Obj_col_stride( B ); if ( blas_vect == 'Q' ) k_t = FLA_Obj_vector_dim( t ); else k_t = FLA_Obj_vector_dim( t ) + 1; if ( FLA_Obj_is_real( A ) && trans == FLA_CONJ_TRANSPOSE ) trans = FLA_TRANSPOSE; FLA_Param_map_flame_to_netlib_side( side, &blas_side ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); // Make a workspace query the first time through. This will provide us with // and ideal workspace size based on an internal block size. lwork = -1; FLA_Obj_create( datatype, 1, 1, 0, 0, &work ); for ( i = 0; i < 2; ++i ) { if ( i == 1 ) { // Grab the queried ideal workspace size from the work array, free the // work object, and then re-allocate the workspace with the ideal size. if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) lwork = ( int ) *FLA_FLOAT_PTR( work ); else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) lwork = ( int ) *FLA_DOUBLE_PTR( work ); FLA_Obj_free( &work ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work ); } switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_t = ( float * ) FLA_FLOAT_PTR( t ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_work = ( float * ) FLA_FLOAT_PTR( work ); F77_sormbr( &blas_vect, &blas_side, &blas_trans, &m_B, &n_B, &k_t, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_t = ( double * ) FLA_DOUBLE_PTR( t ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_work = ( double * ) FLA_DOUBLE_PTR( work ); F77_dormbr( &blas_vect, &blas_side, &blas_trans, &m_B, &n_B, &k_t, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_t = ( scomplex * ) FLA_COMPLEX_PTR( t ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work ); F77_cunmbr( &blas_vect, &blas_side, &blas_trans, &m_B, &n_B, &k_t, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_t = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_work = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( work ); F77_zunmbr( &blas_vect, &blas_side, &blas_trans, &m_B, &n_B, &k_t, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } } } FLA_Obj_free( &work ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
void time_Apply_G_rf( int variant, int type, int n_repeats, int m, int k, int n, int b_alg, FLA_Obj A, FLA_Obj A_ref, FLA_Obj G, FLA_Obj P, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj A_save, G_save, norm; if ( FLA_Obj_is_real( A ) ) { if ( //( variant == 1 && type == FLA_ALG_UNB_OPT ) || //( variant == 1 && type == FLA_ALG_UNB_ASM ) || //( variant == 1 && type == FLA_ALG_BLOCKED ) || //( variant == 2 && type == FLA_ALG_UNB_OPT ) || //( variant == 2 && type == FLA_ALG_UNB_ASM ) || //( variant == 2 && type == FLA_ALG_BLOCKED ) || //( variant == 3 && type == FLA_ALG_UNB_OPT ) || //( variant == 3 && type == FLA_ALG_UNB_ASM ) || //( variant == 3 && type == FLA_ALG_BLOCKED ) || //( variant == 6 && type == FLA_ALG_UNB_OPT ) || //( variant == 6 && type == FLA_ALG_UNB_ASM ) || //( variant == 6 && type == FLA_ALG_BLOCKED ) || //( variant == 9 && type == FLA_ALG_UNB_OPT ) || //( variant == 9 && type == FLA_ALG_UNB_ASM ) || //( variant == 9 && type == FLA_ALG_BLOCKED ) || ( variant == 4 ) || ( variant == 5 ) || ( variant == 7 ) || ( variant == 8 ) || FALSE ) { *gflops = 0.0; *diff = 0.0; return; } } else if ( FLA_Obj_is_complex( A ) ) { if ( //( variant == 1 && type == FLA_ALG_UNB_OPT ) || //( variant == 1 && type == FLA_ALG_UNB_ASM ) || //( variant == 1 && type == FLA_ALG_BLOCKED ) || //( variant == 2 && type == FLA_ALG_UNB_OPT ) || //( variant == 2 && type == FLA_ALG_UNB_ASM ) || //( variant == 2 && type == FLA_ALG_BLOCKED ) || //( variant == 3 && type == FLA_ALG_UNB_OPT ) || //( variant == 3 && type == FLA_ALG_UNB_ASM ) || //( variant == 3 && type == FLA_ALG_BLOCKED ) || //( variant == 6 && type == FLA_ALG_UNB_OPT ) || //( variant == 6 && type == FLA_ALG_UNB_ASM ) || //( variant == 6 && type == FLA_ALG_BLOCKED ) || //( variant == 9 && type == FLA_ALG_UNB_OPT ) || //( variant == 9 && type == FLA_ALG_UNB_ASM ) || //( variant == 9 && type == FLA_ALG_BLOCKED ) || ( variant == 4 ) || ( variant == 5 ) || ( variant == 7 ) || ( variant == 8 ) || FALSE ) { *gflops = 0.0; *diff = 0.0; return; } } FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, G, &G_save ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); //dim_t b_flash_m = b_alg; //dim_t b_flash_n = n; //FLASH_Obj_create_hier_copy_of_flat_ext( A, 1, &b_flash_m, &b_flash_n, &AH ); //printf ( "flash dims: %d x %d\n", FLA_Obj_length( AH ), FLA_Obj_width( AH ) ); FLA_Copy_external( A, A_save ); FLA_Copy_external( G, G_save ); for ( irep = 0 ; irep < n_repeats; irep++ ){ FLA_Copy_external( A_save, A ); FLA_Copy_external( G_save, G ); //FLASH_Obj_hierarchify( A_save, AH ); *dtime = FLA_Clock(); switch( variant ){ case 0: break; // Time variant 1 case 1: { switch( type ){ case FLA_ALG_UNB_OPT: FLA_Apply_G_rf_opt_var1( G, A ); break; case FLA_ALG_UNB_ASM: FLA_Apply_G_rf_asm_var1( G, A ); break; case FLA_ALG_BLOCKED: FLA_Apply_G_rf_blk_var1( G, A, b_alg ); break; } break; } // Time variant 2 case 2: { switch( type ){ case FLA_ALG_UNB_OPT: FLA_Apply_G_rf_opt_var2( G, A ); break; case FLA_ALG_UNB_ASM: FLA_Apply_G_rf_asm_var2( G, A ); break; case FLA_ALG_BLOCKED: FLA_Apply_G_rf_blk_var2( G, A, b_alg ); break; } break; } // Time variant 3 case 3: { switch( type ){ case FLA_ALG_UNB_OPT: FLA_Apply_G_rf_opt_var3( G, A ); break; case FLA_ALG_UNB_ASM: FLA_Apply_G_rf_asm_var3( G, A ); break; case FLA_ALG_BLOCKED: FLA_Apply_G_rf_blk_var3( G, A, b_alg ); break; } break; } // Time variant 6 case 6: { switch( type ){ case FLA_ALG_UNB_OPT: FLA_Apply_G_rf_opt_var6( G, A ); break; case FLA_ALG_UNB_ASM: FLA_Apply_G_rf_asm_var6( G, A ); break; case FLA_ALG_BLOCKED: FLA_Apply_G_rf_blk_var6( G, A, b_alg ); break; } break; } // Time variant 9 case 9: { switch( type ){ case FLA_ALG_UNB_OPT: FLA_Apply_G_rf_opt_var9( G, A ); break; case FLA_ALG_UNB_ASM: FLA_Apply_G_rf_asm_var9( G, A ); break; case FLA_ALG_BLOCKED: FLA_Apply_G_rf_blk_var9( G, A, b_alg ); break; } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } if ( variant == 1 && type == FLA_ALG_UNB_OPT ) { //FLA_Obj_show( "A_ref", A, "%9.2e + %9.2e ", "" ); //FLA_Obj_show( "A", A, "%9.2e ", "" ); FLA_Copy( A, A_ref ); *diff = 0.0; } else { //FLA_Obj_show( "A", A, "%9.2e + %9.2e ", "" ); //if ( variant == 7 && type == FLA_ALG_UNB_ASM ) //FLA_Obj_show( "A", A, "%9.2e", "" ); //if ( variant == 9 ) FLASH_Obj_flatten( AH, A ); FLA_Axpy( FLA_MINUS_ONE, A_ref, A ); FLA_Norm_frob( A, norm ); FLA_Obj_extract_real_scalar( norm, diff ); //*diff = FLA_Max_elemwise_diff( A_ref, A ); } *gflops = 6.0 * k * m * ( n - 1 ) / dtime_old / 1e9; if ( FLA_Obj_is_complex( A ) ) *gflops *= 2.0; *dtime = dtime_old; FLA_Copy_external( A_save, A ); FLA_Copy_external( G_save, G ); //FLASH_Obj_free( &AH ); FLA_Obj_free( &A_save ); FLA_Obj_free( &G_save ); FLA_Obj_free( &norm ); }
void time_Hess_UT( int variant, int type, int nrepeats, int m, FLA_Obj A, FLA_Obj A_ref, FLA_Obj t, FLA_Obj T, FLA_Obj W, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj A_save, norm; FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); FLA_Copy_external( A, A_save ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( A_save, A ); *dtime = FLA_Clock(); switch( variant ){ case 0:{ switch( type ){ case FLA_ALG_REFERENCE: REF_Hess_UT( A, t ); break; case FLA_ALG_FRONT: FLA_Hess_UT( A, T ); break; default: printf("trouble\n"); } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } //if ( type == FLA_ALG_REFERENCE ) //{ // ; //} //else { FLA_Obj AT, AB; FLA_Obj Q, QT, QB; FLA_Obj E, ET, EB; FLA_Obj F; dim_t m_A, m_T; m_A = FLA_Obj_length( A ); m_T = FLA_Obj_length( T ); FLA_Obj_create( FLA_Obj_datatype( A ), m_A, m_A, 0, 0, &Q ); FLA_Set_to_identity( Q ); FLA_Part_2x1( Q, &QT, &QB, 1, FLA_TOP ); FLA_Part_2x1( A, &AT, &AB, 1, FLA_TOP ); if ( type == FLA_ALG_REFERENCE ) { if ( FLA_Obj_is_real( A ) ) FLA_Apply_Q_blk_external( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, AB, t, QB ); else FLA_Apply_Q_blk_external( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, AB, t, QB ); } else FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, AB, T, W, QB ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &E ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &F ); FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, A_save, Q, FLA_ZERO, E ); FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, Q, E, FLA_ZERO, F ); FLA_Copy( A, E ); FLA_Part_2x1( E, &ET, &EB, 1, FLA_TOP ); FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, EB ); *diff = FLA_Max_elemwise_diff( E, F ); FLA_Obj_free( &Q ); FLA_Obj_free( &E ); FLA_Obj_free( &F ); } *gflops = ( 10.0 / 3.0 * m * m * m ) / dtime_old / 1e9; if ( FLA_Obj_is_complex( A ) ) *gflops *= 4.0; *dtime = dtime_old; FLA_Copy_external( A_save, A ); FLA_Obj_free( &A_save ); FLA_Obj_free( &norm ); }
void time_Bidiag_UT( int param_combo, int type, int nrepeats, int m, int n, FLA_Obj A, FLA_Obj tu, FLA_Obj tv, FLA_Obj TU, FLA_Obj TV, double *dtime, double *diff, double *gflops ) { int irep; double dtime_old = 1.0e9; FLA_Obj A_save, norm; FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &A_save ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), 1, 1, 0, 0, &norm ); FLA_Copy_external( A, A_save ); for ( irep = 0 ; irep < nrepeats; irep++ ){ FLA_Copy_external( A_save, A ); *dtime = FLA_Clock(); switch( param_combo ){ case 0: { switch( type ) { case FLA_ALG_REFERENCE: REF_Bidiag_UT( A, tu, tv ); break; case FLA_ALG_FRONT: FLA_Bidiag_UT( A, TU, TV ); break; } break; } } *dtime = FLA_Clock() - *dtime; dtime_old = min( *dtime, dtime_old ); } { FLA_Obj AL, AR; FLA_Obj ATL, ATR, ABL, ABR; FLA_Obj QU; FLA_Obj QV, QVL, QVR; FLA_Obj E, EL, ER; FLA_Obj F; FLA_Obj WU, WV, eye; FLA_Obj tvT, tvB; dim_t m_A, n_A, m_TU; //FLA_Obj_show( "A_save", A_save, "%10.3e", "" ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); m_TU = FLA_Obj_length( TU ); FLA_Obj_create( FLA_Obj_datatype( A ), m_A, m_A, 0, 0, &QU ); FLA_Obj_create( FLA_Obj_datatype( A ), n_A, n_A, 0, 0, &QV ); FLA_Obj_create( FLA_Obj_datatype( A ), m_TU, m_A, 0, 0, &WU ); FLA_Obj_create( FLA_Obj_datatype( A ), m_TU, n_A, 0, 0, &WV ); FLA_Set_to_identity( QU ); FLA_Set_to_identity( QV ); FLA_Part_1x2( QV, &QVL, &QVR, 1, FLA_LEFT ); FLA_Part_1x2( A, &AL, &AR, 1, FLA_LEFT ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 1, 1, FLA_BL ); FLA_Part_2x1( tv, &tvT, &tvB, 1, FLA_BOTTOM ); if ( type == FLA_ALG_REFERENCE ) { if ( FLA_Obj_is_real( A ) ) FLA_Apply_Q_blk_external( FLA_LEFT, FLA_TRANSPOSE, FLA_COLUMNWISE, A, tu, QU ); else FLA_Apply_Q_blk_external( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_COLUMNWISE, A, tu, QU ); //FLA_Apply_Q_blk_external( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_ROWWISE, AR, tv, QVR ); // // Need to apply backwards transformation, since vectors are stored columnwise. // QL? RQ? // //FLA_Apply_Q_blk_external( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_ROWWISE, ATR, tvT, QVR ); //FLA_Apply_Q_blk_external( FLA_RIGHT, FLA_CONJ_TRANSPOSE, FLA_ROWWISE, ATR, tvT, QVR ); //FLA_Apply_Q_blk_external( FLA_RIGHT, FLA_CONJ_TRANSPOSE, FLA_ROWWISE, AR, tvT, QVR ); } else { FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A, TU, WU, QU ); FLA_Apply_Q_UT( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE, AR, TV, WV, QVR ); } /* FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &eye ); FLA_Set_to_identity( eye ); //FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, // FLA_ONE, QV, QV, FLA_MINUS_ONE, eye ); FLA_Gemm( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE, FLA_ONE, QU, QU, FLA_MINUS_ONE, eye ); FLA_Obj_show( "eye", eye, "%10.3e", "" ); FLA_Norm_frob( eye, norm ); FLA_Obj_extract_real_scalar( norm, diff ); FLA_Obj_free( &eye ); */ FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &E ); FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &F ); FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, A_save, QV, FLA_ZERO, E ); FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, QU, E, FLA_ZERO, F ); //FLA_Obj_show( "A_save", A_save, "%10.3e", "" ); FLA_Copy( A, E ); FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, E ); FLA_Part_1x2( E, &EL, &ER, 1, FLA_LEFT ); FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_NONUNIT_DIAG, ER ); //FLA_Obj_show( "B", E, "%10.3e", "" ); //FLA_Obj_show( "Q'AV", F, "%10.3e", "" ); //FLA_Obj_show( "B", E, "%10.3e + %10.3e ", "" ); //FLA_Obj_show( "Q'AV", F, "%10.3e + %10.3e ", "" ); *diff = FLA_Max_elemwise_diff( E, F ); FLA_Obj_free( &E ); FLA_Obj_free( &F ); FLA_Obj_free( &QU ); FLA_Obj_free( &QV ); FLA_Obj_free( &WU ); FLA_Obj_free( &WV ); } *gflops = 4.0 * n * n * ( m - n / 3.0 ) / dtime_old / 1e9; if ( FLA_Obj_is_complex( A ) ) *gflops *= 4.0; *dtime = dtime_old; FLA_Copy_external( A_save, A ); FLA_Obj_free( &A_save ); FLA_Obj_free( &norm ); }