FLA_Error FLA_Tridiag_UT_l_step_opd_var1( int m_A, int m_T, double* buff_A, int rs_A, int cs_A, double* buff_T, int rs_T, int cs_T ) { double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO ); double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO ); double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE ); double first_elem; double beta; double inv_tau11; double minus_inv_tau11; int i; // b_alg = FLA_Obj_length( T ); int b_alg = m_T; // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z ); double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) ); int inc_z = 1; for ( i = 0; i < b_alg; ++i ) { double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A; double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A; double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A; double* t01 = buff_T + (i )*cs_T + (0 )*rs_T; double* tau11 = buff_T + (i )*cs_T + (i )*rs_T; double* z21 = buff_z + (i+1)*inc_z; double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A; double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A; int m_ahead = m_A - i - 1; int n_behind = i; /*------------------------------------------------------------*/ if ( m_ahead > 0 ) { // FLA_Househ2_UT( FLA_LEFT, // a21_t, // a21_b, tau11 ); FLA_Househ2_UT_l_opd( m_ahead - 1, a21_t, a21_b, rs_A, tau11 ); // FLA_Set( FLA_ONE, inv_tau11 ); // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 ); // FLA_Copy( inv_tau11, minus_inv_tau11 ); // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 ); bl1_ddiv3( buff_1, tau11, &inv_tau11 ); bl1_dneg2( &inv_tau11, &minus_inv_tau11 ); // FLA_Copy( a21_t, first_elem ); // FLA_Set( FLA_ONE, a21_t ); first_elem = *a21_t; *a21_t = *buff_1; // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 ); bl1_dsymv( BLIS1_LOWER_TRIANGULAR, m_ahead, buff_1, A22, rs_A, cs_A, a21, rs_A, buff_0, z21, inc_z ); // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta ); // FLA_Inv_scal( FLA_TWO, beta ); bl1_ddot( BLIS1_CONJUGATE, m_ahead, a21, rs_A, z21, inc_z, &beta ); bl1_dinvscals( buff_2, &beta ); // FLA_Scal( minus_inv_tau11, beta ); // FLA_Axpy( beta, a21, z21 ); // FLA_Scal( inv_tau11, z21 ); bl1_dscals( &minus_inv_tau11, &beta ); bl1_daxpyv( BLIS1_NO_CONJUGATE, m_ahead, &beta, a21, rs_A, z21, inc_z ); bl1_dscalv( BLIS1_NO_CONJUGATE, m_ahead, &inv_tau11, z21, inc_z ); // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 ); bl1_dsyr2( BLIS1_LOWER_TRIANGULAR, m_ahead, buff_m1, a21, rs_A, z21, inc_z, A22, rs_A, cs_A ); // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 ); bl1_dgemv( BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, m_ahead, n_behind, buff_1, A20, rs_A, cs_A, a21, rs_A, buff_0, t01, rs_T ); // FLA_Copy( first_elem, a21_t ); *a21_t = first_elem; } /*------------------------------------------------------------*/ } // FLA_Obj_free( &z ); FLA_free( buff_z ); return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blk_var6( FLA_Obj G, FLA_Obj A, dim_t b_alg ) { FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Scale_diag( FLA_Conj conj, FLA_Obj alpha, FLA_Obj A ) { FLA_Datatype datatype_A; FLA_Datatype datatype_alpha; dim_t m_A, n_A; dim_t rs_A, cs_A; conj_t blis_conj; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Scale_diag_check( conj, alpha, A ); datatype_A = FLA_Obj_datatype( A ); datatype_alpha = FLA_Obj_datatype( alpha ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_blis_conj( conj, &blis_conj ); switch( datatype_A ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bli_sscalediag( blis_conj, 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bli_dscalediag( blis_conj, 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { if ( datatype_alpha == FLA_COMPLEX ) { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); bli_cscalediag( blis_conj, 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); } else { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bli_csscalediag( blis_conj, 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); } break; } case FLA_DOUBLE_COMPLEX: { if ( datatype_alpha == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); bli_zscalediag( blis_conj, 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); } else { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bli_zdscalediag( blis_conj, 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); } break; } } return FLA_SUCCESS; }
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1( FLA_Obj A, FLA_Obj u, FLA_Obj tau, FLA_Obj a, FLA_Obj beta, FLA_Obj y, FLA_Obj w ) { /* Effective computation: y = beta * y + A' * u; a = a - conj(y) / tau; w = A * conj(a); */ FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_u, inc_a, inc_y, inc_w; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_u = FLA_Obj_vector_inc( u ); inc_a = FLA_Obj_vector_inc( a ); inc_y = FLA_Obj_vector_inc( y ); inc_w = FLA_Obj_vector_inc( w ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_u = FLA_FLOAT_PTR( u ); float* buff_a = FLA_FLOAT_PTR( a ); float* buff_y = FLA_FLOAT_PTR( y ); float* buff_w = FLA_FLOAT_PTR( w ); float* buff_tau = FLA_FLOAT_PTR( tau ); float* buff_beta = FLA_FLOAT_PTR( beta ); FLA_Fused_Ahx_Axpy_Ax_ops_var1( m_A, n_A, buff_tau, buff_beta, buff_A, rs_A, cs_A, buff_u, inc_u, buff_a, inc_a, buff_y, inc_y, buff_w, inc_w ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_u = FLA_DOUBLE_PTR( u ); double* buff_a = FLA_DOUBLE_PTR( a ); double* buff_y = FLA_DOUBLE_PTR( y ); double* buff_w = FLA_DOUBLE_PTR( w ); double* buff_tau = FLA_DOUBLE_PTR( tau ); double* buff_beta = FLA_DOUBLE_PTR( beta ); FLA_Fused_Ahx_Axpy_Ax_opd_var1( m_A, n_A, buff_tau, buff_beta, buff_A, rs_A, cs_A, buff_u, inc_u, buff_a, inc_a, buff_y, inc_y, buff_w, inc_w ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_u = FLA_COMPLEX_PTR( u ); scomplex* buff_a = FLA_COMPLEX_PTR( a ); scomplex* buff_y = FLA_COMPLEX_PTR( y ); scomplex* buff_w = FLA_COMPLEX_PTR( w ); scomplex* buff_tau = FLA_COMPLEX_PTR( tau ); scomplex* buff_beta = FLA_COMPLEX_PTR( beta ); FLA_Fused_Ahx_Axpy_Ax_opc_var1( m_A, n_A, buff_tau, buff_beta, buff_A, rs_A, cs_A, buff_u, inc_u, buff_a, inc_a, buff_y, inc_y, buff_w, inc_w ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u ); dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a ); dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w ); dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau ); dcomplex* buff_beta = FLA_DOUBLE_COMPLEX_PTR( beta ); FLA_Fused_Ahx_Axpy_Ax_opz_var1( m_A, n_A, buff_tau, buff_beta, buff_A, rs_A, cs_A, buff_u, inc_u, buff_a, inc_a, buff_y, inc_y, buff_w, inc_w ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Trsmsx_external( FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Datatype datatype; int m_B, n_B; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; side_t blis_side; uplo_t blis_uplo; trans_t blis_trans; diag_t blis_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trsmsx_check( side, uplo, trans, diag, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); FLA_Param_map_flame_to_blis_diag( diag, &blis_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_strsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dtrsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bli_ctrsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bli_ztrsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Lyap_h_opt_var1( FLA_Obj isgn, FLA_Obj A, FLA_Obj C ) { FLA_Datatype datatype; int m_AC; int rs_A, cs_A; int rs_W, cs_W; int rs_C, cs_C; FLA_Obj W; FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &W ); datatype = FLA_Obj_datatype( A ); m_AC = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_W = FLA_Obj_row_stride( W ); cs_W = FLA_Obj_col_stride( W ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_W = FLA_FLOAT_PTR( W ); float* buff_C = FLA_FLOAT_PTR( C ); float* buff_sgn = FLA_FLOAT_PTR( isgn ); FLA_Lyap_h_ops_var1( m_AC, buff_sgn, buff_A, rs_A, cs_A, buff_W, rs_W, cs_W, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_W = FLA_DOUBLE_PTR( W ); double* buff_C = FLA_DOUBLE_PTR( C ); double* buff_sgn = FLA_DOUBLE_PTR( isgn ); FLA_Lyap_h_opd_var1( m_AC, buff_sgn, buff_A, rs_A, cs_A, buff_W, rs_W, cs_W, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_W = FLA_COMPLEX_PTR( W ); scomplex* buff_C = FLA_COMPLEX_PTR( C ); scomplex* buff_sgn = FLA_COMPLEX_PTR( isgn ); FLA_Lyap_h_opc_var1( m_AC, buff_sgn, buff_A, rs_A, cs_A, buff_W, rs_W, cs_W, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W ); dcomplex* buff_C = FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex* buff_sgn = FLA_DOUBLE_COMPLEX_PTR( isgn ); FLA_Lyap_h_opz_var1( m_AC, buff_sgn, buff_A, rs_A, cs_A, buff_W, rs_W, cs_W, buff_C, rs_C, cs_C ); break; } } FLA_Obj_free( &W ); return FLA_SUCCESS; }
FLA_Error FLA_Hev_2x2( FLA_Obj alpha11, FLA_Obj alpha21, FLA_Obj alpha22, FLA_Obj lambda1, FLA_Obj lambda2 ) /* Compute the eigenvalues of a 2x2 symmetric matrix A: / alpha11 alpha21 \ \ alpha21 alpha22 / Upon completion, lambda1 and lambda2 are overwritten with the eigenvalues of larger and smaller absolute values, respectively. This routine is a nearly-verbatim translation of slae2() and dlae2() from the netlib distribution of LAPACK. -FGVZ */ { FLA_Datatype datatype; datatype = FLA_Obj_datatype( alpha11 ); switch ( datatype ) { case FLA_FLOAT: { float* buff_alpha11 = FLA_FLOAT_PTR( alpha11 ); float* buff_alpha21 = FLA_FLOAT_PTR( alpha21 ); float* buff_alpha22 = FLA_FLOAT_PTR( alpha22 ); float* buff_lambda1 = FLA_FLOAT_PTR( lambda1 ); float* buff_lambda2 = FLA_FLOAT_PTR( lambda2 ); FLA_Hev_2x2_ops( buff_alpha11, buff_alpha21, buff_alpha22, buff_lambda1, buff_lambda2 ); break; } case FLA_DOUBLE: { double* buff_alpha11 = FLA_DOUBLE_PTR( alpha11 ); double* buff_alpha21 = FLA_DOUBLE_PTR( alpha21 ); double* buff_alpha22 = FLA_DOUBLE_PTR( alpha22 ); double* buff_lambda1 = FLA_DOUBLE_PTR( lambda1 ); double* buff_lambda2 = FLA_DOUBLE_PTR( lambda2 ); FLA_Hev_2x2_opd( buff_alpha11, buff_alpha21, buff_alpha22, buff_lambda1, buff_lambda2 ); break; } case FLA_COMPLEX: { FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); break; } case FLA_DOUBLE_COMPLEX: { FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Hemvc_external( FLA_Uplo uplo, FLA_Conj conja, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_x; int inc_y; uplo_t blis_uplo; conj_t blis_conja; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Hemvc_check( uplo, conja, alpha, A, x, beta, y ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_conj( conja, &blis_conja ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_ssymv( blis_uplo, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dsymv( blis_uplo, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bli_chemv( blis_uplo, blis_conja, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bli_zhemv( blis_uplo, blis_conja, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Eig_gest_iu_opt_var1( FLA_Obj A, FLA_Obj Y, FLA_Obj B ) { FLA_Datatype datatype; int m_AB; int rs_A, cs_A; int rs_B, cs_B; int inc_y; FLA_Obj yL, yR; datatype = FLA_Obj_datatype( A ); m_AB = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); FLA_Part_1x2( Y, &yL, &yR, 1, FLA_LEFT ); inc_y = FLA_Obj_vector_inc( yL ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_y = FLA_FLOAT_PTR( yL ); float* buff_B = FLA_FLOAT_PTR( B ); FLA_Eig_gest_iu_ops_var1( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_y = FLA_DOUBLE_PTR( yL ); double* buff_B = FLA_DOUBLE_PTR( B ); FLA_Eig_gest_iu_opd_var1( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_y = FLA_COMPLEX_PTR( yL ); scomplex* buff_B = FLA_COMPLEX_PTR( B ); FLA_Eig_gest_iu_opc_var1( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yL ); dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B ); FLA_Eig_gest_iu_opz_var1( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_apply_Q_external( FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Obj A, FLA_Obj t, FLA_Obj B ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; // int m_A, n_A; int m_B, n_B; int cs_A; int cs_B; int k_t; int lwork; char blas_side; char blas_uplo; char blas_trans; FLA_Obj work; int i; //if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) // FLA_Apply_Q_check( side, trans, storev, A, t, B ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); // m_A = FLA_Obj_length( A ); // n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); cs_B = FLA_Obj_col_stride( B ); k_t = FLA_Obj_vector_dim( t ); FLA_Param_map_flame_to_netlib_side( side, &blas_side ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); // Make a workspace query the first time through. This will provide us with // and ideal workspace size based on an internal block size. lwork = -1; FLA_Obj_create( datatype, 1, 1, 0, 0, &work ); for ( i = 0; i < 2; ++i ) { if ( i == 1 ) { // Grab the queried ideal workspace size from the work array, free the // work object, and then re-allocate the workspace with the ideal size. if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) lwork = ( int ) *FLA_FLOAT_PTR( work ); else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) lwork = ( int ) *FLA_DOUBLE_PTR( work ); FLA_Obj_free( &work ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work ); } switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_t = ( float * ) FLA_FLOAT_PTR( t ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_work = ( float * ) FLA_FLOAT_PTR( work ); F77_sormtr( &blas_side, &blas_uplo, &blas_trans, &m_B, &n_B, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_t = ( double * ) FLA_DOUBLE_PTR( t ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_work = ( double * ) FLA_DOUBLE_PTR( work ); F77_dormtr( &blas_side, &blas_uplo, &blas_trans, &m_B, &n_B, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_t = ( scomplex * ) FLA_COMPLEX_PTR( t ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work ); F77_cunmtr( &blas_side, &blas_uplo, &blas_trans, &m_B, &n_B, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_t = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_work = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( work ); F77_zunmtr( &blas_side, &blas_uplo, &blas_trans, &m_B, &n_B, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } } } FLA_Obj_free( &work ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Tridiag_UT_l_step_opd_var2( int m_A, int m_T, double* buff_A, int rs_A, int cs_A, double* buff_T, int rs_T, int cs_T ) { double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO ); double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO ); double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE ); double first_elem; double beta; double inv_tau11; double minus_inv_tau11; double minus_upsilon11, minus_conj_upsilon11; double minus_zeta11, minus_conj_zeta11; int i; // b_alg = FLA_Obj_length( T ); int b_alg = m_T; // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u ); // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z ); // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w ); double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) ); double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) ); double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) ); int inc_u = 1; int inc_z = 1; int inc_w = 1; // Initialize some variables (only to prevent compiler warnings). first_elem = *buff_0; minus_inv_tau11 = *buff_0; for ( i = 0; i < b_alg; ++i ) { double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A; double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A; double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A; double* t01 = buff_T + (i )*cs_T + (0 )*rs_T; double* tau11 = buff_T + (i )*cs_T + (i )*rs_T; double* upsilon11= buff_u + (i )*inc_u; double* u21 = buff_u + (i+1)*inc_u; double* zeta11 = buff_z + (i )*inc_z; double* z21 = buff_z + (i+1)*inc_z; double* w21 = buff_w + (i+1)*inc_w; double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A; double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A; int m_ahead = m_A - i - 1; int m_behind = i; int n_behind = i; /*------------------------------------------------------------*/ if ( m_behind > 0 ) { // FLA_Copy( upsilon11, minus_upsilon11 ); // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 ); // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 ); bl1_dmult3( buff_m1, upsilon11, &minus_upsilon11 ); bl1_dcopyconj( &minus_upsilon11, &minus_conj_upsilon11 ); // FLA_Copy( zeta11, minus_zeta11 ); // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 ); // FLA_Copy( minus_zeta11, minus_conj_zeta11 ); bl1_dmult3( buff_m1, zeta11, &minus_zeta11 ); bl1_dcopyconj( &minus_zeta11, &minus_conj_zeta11 ); // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 ); // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 ); bl1_daxpyv( BLIS1_CONJUGATE, 1, &minus_upsilon11, zeta11, 1, alpha11, 1 ); bl1_daxpyv( BLIS1_CONJUGATE, 1, &minus_zeta11, upsilon11, 1, alpha11, 1 ); // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 ); // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 ); bl1_daxpyv( BLIS1_NO_CONJUGATE, m_ahead, &minus_conj_zeta11, u21, inc_u, a21, rs_A ); bl1_daxpyv( BLIS1_NO_CONJUGATE, m_ahead, &minus_conj_upsilon11, z21, inc_z, a21, rs_A ); } if ( m_ahead > 0 ) { // FLA_Househ2_UT( FLA_LEFT, // a21_t, // a21_b, tau11 ); FLA_Househ2_UT_l_opd( m_ahead - 1, a21_t, a21_b, rs_A, tau11 ); // FLA_Set( FLA_ONE, inv_tau11 ); // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 ); // FLA_Copy( inv_tau11, minus_inv_tau11 ); // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 ); bl1_ddiv3( buff_1, tau11, &inv_tau11 ); bl1_dneg2( &inv_tau11, &minus_inv_tau11 ); // FLA_Copy( a21_t, first_elem ); // FLA_Set( FLA_ONE, a21_t ); first_elem = *a21_t; *a21_t = *buff_1; } if ( m_behind > 0 ) { // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 ); bl1_dsyr2( BLIS1_LOWER_TRIANGULAR, m_ahead, buff_m1, u21, inc_u, z21, inc_z, A22, rs_A, cs_A ); } if ( m_ahead > 0 ) { // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 ); bl1_dsymv( BLIS1_LOWER_TRIANGULAR, m_ahead, buff_1, A22, rs_A, cs_A, a21, rs_A, buff_0, w21, inc_w ); // FLA_Copy( a21, u21 ); // FLA_Copy( w21, z21 ); bl1_dcopyv( BLIS1_NO_CONJUGATE, m_ahead, a21, rs_A, u21, inc_u ); bl1_dcopyv( BLIS1_NO_CONJUGATE, m_ahead, w21, inc_w, z21, inc_z ); // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta ); // FLA_Inv_scal( FLA_TWO, beta ); bl1_ddot( BLIS1_CONJUGATE, m_ahead, a21, rs_A, z21, inc_z, &beta ); bl1_dinvscals( buff_2, &beta ); // FLA_Scal( minus_inv_tau11, beta ); // FLA_Axpy( beta, a21, z21 ); // FLA_Scal( inv_tau11, z21 ); bl1_dscals( &minus_inv_tau11, &beta ); bl1_daxpyv( BLIS1_NO_CONJUGATE, m_ahead, &beta, a21, rs_A, z21, inc_z ); bl1_dscalv( BLIS1_NO_CONJUGATE, m_ahead, &inv_tau11, z21, inc_z ); // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 ); bl1_dgemv( BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, m_ahead, n_behind, buff_1, A20, rs_A, cs_A, a21, rs_A, buff_0, t01, rs_T ); // FLA_Copy( first_elem, a21_t ); *a21_t = first_elem; } if ( m_behind + 1 == b_alg && m_ahead > 0 ) { // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 ); bl1_dsyr2( BLIS1_LOWER_TRIANGULAR, m_ahead, buff_m1, u21, inc_u, z21, inc_z, A22, rs_A, cs_A ); } /*------------------------------------------------------------*/ } // FLA_Obj_free( &u ); // FLA_Obj_free( &z ); // FLA_Obj_free( &w ); FLA_free( buff_u ); FLA_free( buff_z ); FLA_free( buff_w ); return FLA_SUCCESS; }
FLA_Error FLA_Gemv_external( FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x; int inc_y; trans1_t blis_transa; conj1_t blis_conjx; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Gemv_check( transa, alpha, A, x, beta, y ); if ( FLA_Obj_has_zero_dim( A ) ) { FLA_Scal_external( beta, y ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_trans( transa, &blis_transa ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_sgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_cgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_zgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Herk_external_gpu( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu ) { FLA_Datatype datatype; int k_A; int m_A, n_A; int m_C; int ldim_A; int ldim_C; char blas_uplo; char blas_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Herk_check( uplo, trans, alpha, A, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); ldim_A = FLA_Obj_length( A ); m_C = FLA_Obj_length( C ); ldim_C = FLA_Obj_length( C ); if ( trans == FLA_NO_TRANSPOSE ) k_A = n_A; else k_A = m_A; FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasSsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( float * ) A_gpu, ldim_A, *buff_beta, ( float * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasDsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( double * ) A_gpu, ldim_A, *buff_beta, ( double * ) C_gpu, ldim_C ); break; } case FLA_COMPLEX: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasCherk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, *buff_beta, ( cuComplex * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE_COMPLEX: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasZherk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, *buff_beta, ( cuDoubleComplex * ) C_gpu, ldim_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Axpy_external( FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Datatype datatype; int m_B, n_B; int rs_A, cs_A; int rs_B, cs_B; trans1_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Axpy_check( alpha, A, B ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); if ( FLA_Obj_is_conformal_to( FLA_NO_TRANSPOSE, A, B ) ) FLA_Param_map_flame_to_blis_trans( FLA_NO_TRANSPOSE, &blis_trans ); else // if ( FLA_Obj_is_conformal_to( FLA_TRANSPOSE, A, B ) ) FLA_Param_map_flame_to_blis_trans( FLA_TRANSPOSE, &blis_trans ); switch ( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bl1_saxpymt( blis_trans, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bl1_daxpymt( blis_trans, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_COMPLEX: { scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bl1_caxpymt( blis_trans, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bl1_zaxpymt( blis_trans, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_LQ_UT_opt_var2( FLA_Obj A, FLA_Obj T ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int rs_T, cs_T; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_T = FLA_Obj_row_stride( T ); cs_T = FLA_Obj_col_stride( T ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_T = FLA_FLOAT_PTR( T ); FLA_LQ_UT_ops_var2( m_A, n_A, buff_A, rs_A, cs_A, buff_T, rs_T, cs_T ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_T = FLA_DOUBLE_PTR( T ); FLA_LQ_UT_opd_var2( m_A, n_A, buff_A, rs_A, cs_A, buff_T, rs_T, cs_T ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_T = FLA_COMPLEX_PTR( T ); FLA_LQ_UT_opc_var2( m_A, n_A, buff_A, rs_A, cs_A, buff_T, rs_T, cs_T ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T ); FLA_LQ_UT_opz_var2( m_A, n_A, buff_A, rs_A, cs_A, buff_T, rs_T, cs_T ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Eig_gest_iu_opd_var1( int m_AB, double* buff_A, int rs_A, int cs_A, double* buff_y, int inc_y, double* buff_B, int rs_B, int cs_B ) { double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO ); double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE ); double* buff_m1h = FLA_DOUBLE_PTR( FLA_MINUS_ONE_HALF ); int i; for ( i = 0; i < m_AB; ++i ) { double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A; double* a01 = buff_A + (i )*cs_A + (0 )*rs_A; double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; double* y01 = buff_y + (0 )*inc_y; double* B00 = buff_B + (0 )*cs_B + (0 )*rs_B; double* b01 = buff_B + (i )*cs_B + (0 )*rs_B; double* beta11 = buff_B + (i )*cs_B + (i )*rs_B; int m_behind = i; /*------------------------------------------------------------*/ // FLA_Hemvc_external( FLA_UPPER_TRIANGULAR, FLA_NO_CONJUGATE, // FLA_ONE, A00, b01, FLA_ZERO, y01_l ); bl1_dhemv( BLIS1_UPPER_TRIANGULAR, BLIS1_NO_CONJUGATE, m_behind, buff_1, A00, rs_A, cs_A, b01, rs_B, buff_0, y01, inc_y ); // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, // B00, a01 ); bl1_dtrsv( BLIS1_UPPER_TRIANGULAR, BLIS1_CONJ_TRANSPOSE, BLIS1_NONUNIT_DIAG, m_behind, B00, rs_B, cs_B, a01, rs_A ); // FLA_Axpy_external( FLA_MINUS_ONE_HALF, y01_l, a01 ); bl1_daxpyv( BLIS1_NO_CONJUGATE, m_behind, buff_m1h, y01, inc_y, a01, rs_A ); // FLA_Dot2cs_external( FLA_CONJUGATE, FLA_MINUS_ONE, a01, b01, FLA_ONE, alpha11 ); bl1_ddot2s( BLIS1_CONJUGATE, m_behind, buff_m1, a01, rs_A, b01, rs_B, buff_1, alpha11 ); // FLA_Inv_scal_external( beta11, alpha11 ); // FLA_Inv_scal_external( beta11, alpha11 ); bl1_dinvscals( beta11, alpha11 ); bl1_dinvscals( beta11, alpha11 ); // FLA_Axpy_external( FLA_MINUS_ONE_HALF, y01_l, a01 ); bl1_daxpyv( BLIS1_NO_CONJUGATE, m_behind, buff_m1h, y01, inc_y, a01, rs_A ); // FLA_Inv_scal_external( beta11, a01 ); bl1_dinvscalv( BLIS1_NO_CONJUGATE, m_behind, beta11, a01, rs_A ); /*------------------------------------------------------------*/ } return FLA_SUCCESS; }
FLA_Error FLA_LQ_UT_opd_var2( int m_A, int n_A, double* buff_A, int rs_A, int cs_A, double* buff_T, int rs_T, int cs_T ) { double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); int min_m_n = min( m_A, n_A ); int i; for ( i = 0; i < min_m_n; ++i ) { double* a01 = buff_A + (i )*cs_A + (0 )*rs_A; double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A; double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A; double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A; double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A; double* tau11 = buff_T + (i )*cs_T + (i )*rs_T; double* t01 = buff_T + (i )*cs_T + (0 )*rs_T; int m_ahead = m_A - i - 1; int n_ahead = n_A - i - 1; int m_behind = i; /*------------------------------------------------------------*/ // FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t // tau11 ); FLA_Househ2_UT_r_opd( n_ahead, alpha11, a12t, cs_A, tau11 ); // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a12t, a21, A22 ); FLA_Apply_H2_UT_r_opd_var1( m_ahead, n_ahead, tau11, a12t, cs_A, a21, rs_A, A22, rs_A, cs_A ); // FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, a01, t01 ); bl1_dcopyv( BLIS1_CONJUGATE, m_behind, a01, rs_A, t01, rs_T ); // FLA_Gemvc_external( FLA_CONJ_NO_TRANSPOSE, FLA_NO_CONJUGATE, // FLA_ONE, A02, a12t, FLA_ONE, t01 ); bl1_dgemv( BLIS1_CONJ_NO_TRANSPOSE, BLIS1_NO_CONJUGATE, m_behind, n_ahead, buff_1, A02, rs_A, cs_A, a12t, cs_A, buff_1, t01, rs_T ); /*------------------------------------------------------------*/ } return FLA_SUCCESS; }
FLA_Error FLA_Setr( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; uplo1_t blis_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Setr_check( uplo, alpha, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch ( datatype ) { case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); bl1_ssetmr( blis_uplo, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); bl1_dsetmr( blis_uplo, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); bl1_csetmr( blis_uplo, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); bl1_zsetmr( blis_uplo, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Lyap_h_opd_var1( int m_AC, double* buff_sgn, double* buff_A, int rs_A, int cs_A, double* buff_W, int rs_W, int cs_W, double* buff_C, int rs_C, int cs_C ) { double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE ); int i; bl1_dscalm( BLIS1_NO_CONJUGATE, m_AC, m_AC, buff_sgn, buff_C, rs_C, cs_C ); for ( i = 0; i < m_AC; ++i ) { double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A; double* a01 = buff_A + (i )*cs_A + (0 )*rs_A; double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; double* C00 = buff_C + (0 )*cs_C + (0 )*rs_C; double* c01 = buff_C + (i )*cs_C + (0 )*rs_C; double* gamma11 = buff_C + (i )*cs_C + (i )*rs_C; double* W00 = buff_W + (0 )*cs_W + (0 )*rs_W; double omega; int m_behind = i; /*------------------------------------------------------------*/ // FLA_Hemv( FLA_UPPER_TRIANGULAR, FLA_MINUS_ONE, C00, a01, FLA_ONE, c01 ); bl1_dhemv( BLIS1_UPPER_TRIANGULAR, BLIS1_NO_CONJUGATE, m_behind, buff_m1, C00, rs_C, cs_C, a01, rs_A, buff_1, c01, rs_C ); // FLA_Copyrt( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, A00, W00 ); // FLA_Shift_diag( FLA_CONJUGATE, alpha11, W00 ); // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, W00, c01 ); bl1_dcopymrt( BLIS1_UPPER_TRIANGULAR, BLIS1_NO_TRANSPOSE, m_behind, m_behind, A00, rs_A, cs_A, W00, rs_W, cs_W ); bl1_dshiftdiag( BLIS1_CONJUGATE, 0, m_behind, m_behind, alpha11, W00, rs_W, cs_W ); bl1_dtrsv( BLIS1_UPPER_TRIANGULAR, BLIS1_CONJ_TRANSPOSE, BLIS1_NONUNIT_DIAG, m_behind, W00, rs_W, cs_W, c01, rs_C ); // FLA_Dot2cs( FLA_CONJUGATE, FLA_MINUS_ONE, a01, c01, FLA_ONE, gamma11 ); bl1_ddot2s( BLIS1_CONJUGATE, m_behind, buff_m1, a01, rs_A, c01, rs_C, buff_1, gamma11 ); // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, omega ); // FLA_Mult_add( FLA_ONE, alpha11, omega ); // FLA_Inv_scal( omega, gamma11 ); bl1_dcopyconj( alpha11, &omega ); bl1_dadd3( alpha11, &omega, &omega ); bl1_dinvscals( &omega, gamma11 ); /*------------------------------------------------------------*/ } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var2( FLA_Obj G, FLA_Obj A ) /* Apply k sets of Givens rotations to a matrix A from the right, where each set takes the form: A := A ( G(n-1,k) ... G(1,k) G(0,k) )' = A G(0,k)' G(1,k)' ... G(n-1,k)' where Gik is the ith Givens rotation formed from the kth set, stored in the (i,k) entries of of G: Gik = / gamma_ik -sigma_ik \ \ sigma_ik gamma_ik / This variant iterates in pipelined, overlapping fashion and applies rotations to two columns at a time. -FGVZ */ { FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Max_abs_value_herm( FLA_Uplo uplo, FLA_Obj A, FLA_Obj maxabs ) { FLA_Datatype datatype; dim_t n_A; dim_t rs_A, cs_A; uplo1_t blis_uplo; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Max_abs_value_herm_check( uplo, A, maxabs ); datatype = FLA_Obj_datatype( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch ( datatype ){ case FLA_FLOAT: { float* buff_A = ( float * ) FLA_FLOAT_PTR( A ); float* buff_maxabs = ( float * ) FLA_FLOAT_PTR( maxabs ); bl1_smaxabsmr( blis_uplo, n_A, n_A, buff_A, rs_A, cs_A, buff_maxabs ); break; } case FLA_DOUBLE: { double* buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double* buff_maxabs = ( double * ) FLA_DOUBLE_PTR( maxabs ); bl1_dmaxabsmr( blis_uplo, n_A, n_A, buff_A, rs_A, cs_A, buff_maxabs ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); float *buff_maxabs = ( float * ) FLA_FLOAT_PTR( maxabs ); bl1_cmaxabsmr( blis_uplo, n_A, n_A, buff_A, rs_A, cs_A, buff_maxabs ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); double *buff_maxabs = ( double * ) FLA_DOUBLE_PTR( maxabs ); bl1_zmaxabsmr( blis_uplo, n_A, n_A, buff_A, rs_A, cs_A, buff_maxabs ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_SA_LU_unb( FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L ) { FLA_Datatype datatype; int m_U, cs_U; int m_D, cs_D; int cs_L; // int rs_U; int rs_D; // int rs_L; int m_U_min_j, m_U_min_j_min_1; int j, ipiv; int* buff_p; if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( U ); m_U = FLA_Obj_length( U ); // rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); m_D = FLA_Obj_length( D ); rs_D = FLA_Obj_row_stride( D ); cs_D = FLA_Obj_col_stride( D ); // rs_L = FLA_Obj_row_stride( L ); cs_L = FLA_Obj_col_stride( L ); FLA_Copy_external( U, L ); FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L ); buff_p = ( int * ) FLA_INT_PTR( p ); switch ( datatype ){ case FLA_FLOAT: { float* buff_U = ( float * ) FLA_FLOAT_PTR( U ); float* buff_D = ( float * ) FLA_FLOAT_PTR( D ); float* buff_L = ( float * ) FLA_FLOAT_PTR( L ); float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE ); float L_tmp; float D_tmp; float d_inv_Ljj; for ( j = 0; j < m_U; ++j ) { bl1_samax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabsf( L_tmp ) < fabsf( D_tmp ) ) { bl1_sswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } d_inv_Ljj = 1.0F / buff_L[ j*cs_L + j ]; bl1_sscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_sger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_scopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_DOUBLE: { double* buff_U = ( double * ) FLA_DOUBLE_PTR( U ); double* buff_D = ( double * ) FLA_DOUBLE_PTR( D ); double* buff_L = ( double * ) FLA_DOUBLE_PTR( L ); double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE ); double L_tmp; double D_tmp; double d_inv_Ljj; for ( j = 0; j < m_U; ++j ) { bl1_damax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabs( L_tmp ) < fabs( D_tmp ) ) { bl1_dswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } d_inv_Ljj = 1.0 / buff_L[ j*cs_L + j ]; bl1_dscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_dger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_dcopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_COMPLEX: { scomplex* buff_U = ( scomplex * ) FLA_COMPLEX_PTR( U ); scomplex* buff_D = ( scomplex * ) FLA_COMPLEX_PTR( D ); scomplex* buff_L = ( scomplex * ) FLA_COMPLEX_PTR( L ); scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE ); scomplex L_tmp; scomplex D_tmp; scomplex d_inv_Ljj; scomplex Ljj; float temp; for ( j = 0; j < m_U; ++j ) { bl1_camax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabsf( L_tmp.real + L_tmp.imag ) < fabsf( D_tmp.real + D_tmp.imag ) ) { bl1_cswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } Ljj = buff_L[ j*cs_L + j ]; // d_inv_Ljj = 1.0 / Ljj temp = 1.0F / ( Ljj.real * Ljj.real + Ljj.imag * Ljj.imag ); d_inv_Ljj.real = Ljj.real * temp; d_inv_Ljj.imag = Ljj.imag * -temp; bl1_cscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_cger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_ccopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_U = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U ); dcomplex* buff_D = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D ); dcomplex* buff_L = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( L ); dcomplex* buff_minus1 = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE ); dcomplex L_tmp; dcomplex D_tmp; dcomplex d_inv_Ljj; dcomplex Ljj; double temp; for ( j = 0; j < m_U; ++j ) { bl1_zamax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabs( L_tmp.real + L_tmp.imag ) < fabs( D_tmp.real + D_tmp.imag ) ) { bl1_zswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } Ljj = buff_L[ j*cs_L + j ]; // d_inv_Ljj = 1.0 / Ljj temp = 1.0 / ( Ljj.real * Ljj.real + Ljj.imag * Ljj.imag ); d_inv_Ljj.real = Ljj.real * temp; d_inv_Ljj.imag = Ljj.imag * -temp; bl1_zscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_zger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_zcopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } } return FLA_SUCCESS; }
FLA_Error FLA_Hemm_external( FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Datatype datatype; int m_C, n_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; side1_t blis_side; uplo1_t blis_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Hemm_check( side, uplo, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_ssymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dsymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_chemm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_zhemm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Sylv_nn_opt_var1( FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale ) { FLA_Datatype datatype; int m_C, n_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; int info; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); switch ( datatype ) { case FLA_FLOAT: { int* buff_isgn = FLA_INT_PTR( isgn ); float* buff_A = FLA_FLOAT_PTR( A ); float* buff_B = FLA_FLOAT_PTR( B ); float* buff_C = FLA_FLOAT_PTR( C ); float* buff_scale = FLA_FLOAT_PTR( scale ); float sgn = ( float ) *buff_isgn; FLA_Sylv_nn_ops_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } case FLA_DOUBLE: { int* buff_isgn = FLA_INT_PTR( isgn ); double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_B = FLA_DOUBLE_PTR( B ); double* buff_C = FLA_DOUBLE_PTR( C ); double* buff_scale = FLA_DOUBLE_PTR( scale ); double sgn = ( double ) *buff_isgn; FLA_Sylv_nn_opd_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } case FLA_COMPLEX: { int* buff_isgn = FLA_INT_PTR( isgn ); scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_B = FLA_COMPLEX_PTR( B ); scomplex* buff_C = FLA_COMPLEX_PTR( C ); scomplex* buff_scale = FLA_COMPLEX_PTR( scale ); float sgn = ( float ) *buff_isgn; FLA_Sylv_nn_opc_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } case FLA_DOUBLE_COMPLEX: { int* buff_isgn = FLA_INT_PTR( isgn ); dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex* buff_C = FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex* buff_scale = FLA_DOUBLE_COMPLEX_PTR( scale ); double sgn = ( double ) *buff_isgn; FLA_Sylv_nn_opz_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } } return FLA_SUCCESS; }