FLA_Error FLA_Tevd_eigval_v_opt_var1( FLA_Obj G, FLA_Obj d, FLA_Obj e, FLA_Obj k ) { FLA_Datatype datatype; int m_A, n_G; int rs_G, cs_G; int inc_d; int inc_e; datatype = FLA_Obj_datatype( d ); m_A = FLA_Obj_vector_dim( d ); n_G = FLA_Obj_width( G ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); int* buff_k = FLA_INT_PTR( k ); FLA_Tevd_eigval_v_ops_var1( m_A, n_G, buff_G, rs_G, cs_G, buff_d, inc_d, buff_e, inc_e, buff_k ); break; } case FLA_DOUBLE: { dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); int* buff_k = FLA_INT_PTR( k ); FLA_Tevd_eigval_v_opd_var1( m_A, n_G, buff_G, rs_G, cs_G, buff_d, inc_d, buff_e, inc_e, buff_k ); break; } } return FLA_SUCCESS; }
int FLAME_invert_ztau( FLA_Obj t ) { dim_t m = FLA_Obj_vector_dim( t ); dim_t inc = FLA_Obj_vector_inc( t ); dcomplex* buff = FLA_Obj_buffer_at_view( t ); double one = 1.0; double conjsign = one; // if conjugate -one; double zero = 0.0; double temp, s, xr_s, xi_s; dcomplex* chi; int i; for ( i = 0; i < m; ++i ) { chi = buff + i*inc; s = bl1_fmaxabs( chi->real, chi->imag ); if ( s != zero ) { xr_s = chi->real / s; xi_s = chi->imag / s; temp = xr_s * chi->real + xi_s * chi->imag; chi->real = xr_s / temp; chi->imag = conjsign * xi_s / temp; } } return 0; }
// Transform tau. int FLAME_invert_stau( FLA_Obj t ) { dim_t m = FLA_Obj_vector_dim( t ); dim_t inc = FLA_Obj_vector_inc( t ); float* buff = FLA_Obj_buffer_at_view( t ); float one = 1.0F; float zero = 0.0F; float* chi; int i; for ( i = 0; i < m; ++i ) { chi = buff + i*inc; if ( *chi != zero ) *chi = ( one / *chi ); } return 0; }
FLA_Error FLA_Fused_Gerc2_opt_var1( FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A ) { /* Effective computation: A = A + alpha * ( u * y' + z * v' ); */ FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_u, inc_y, inc_z, inc_v; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_u = FLA_Obj_vector_inc( u ); inc_y = FLA_Obj_vector_inc( y ); inc_z = FLA_Obj_vector_inc( z ); inc_v = FLA_Obj_vector_inc( v ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_u = FLA_FLOAT_PTR( u ); float* buff_y = FLA_FLOAT_PTR( y ); float* buff_z = FLA_FLOAT_PTR( z ); float* buff_v = FLA_FLOAT_PTR( v ); float* buff_alpha = FLA_FLOAT_PTR( alpha ); FLA_Fused_Gerc2_ops_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_u = FLA_DOUBLE_PTR( u ); double* buff_y = FLA_DOUBLE_PTR( y ); double* buff_z = FLA_DOUBLE_PTR( z ); double* buff_v = FLA_DOUBLE_PTR( v ); double* buff_alpha = FLA_DOUBLE_PTR( alpha ); FLA_Fused_Gerc2_opd_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_u = FLA_COMPLEX_PTR( u ); scomplex* buff_y = FLA_COMPLEX_PTR( y ); scomplex* buff_z = FLA_COMPLEX_PTR( z ); scomplex* buff_v = FLA_COMPLEX_PTR( v ); scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha ); FLA_Fused_Gerc2_opc_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u ); dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z ); dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v ); dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha ); FLA_Fused_Gerc2_opz_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_pivots_rt_opt_var1( FLA_Obj p, FLA_Obj A ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_p; int k1_0, k2_0; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); // Swap the stride; FLA_Apply_pivots_ln_ops_var1 already consider the memory access pattern. cs_A = FLA_Obj_row_stride( A ); rs_A = FLA_Obj_col_stride( A ); // Use minus increment of the ln version. inc_p = FLA_Obj_vector_inc( p ); // Use zero-based indices. k1_0 = 0; k2_0 = ( int ) FLA_Obj_vector_dim( p ) - 1; switch ( datatype ) { case FLA_INT: { int* buff_A = FLA_INT_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opi_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_ops_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opd_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opc_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opz_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Eig_gest_nl_opt_var5( FLA_Obj A, FLA_Obj Y, FLA_Obj B ) { FLA_Datatype datatype; int m_AB; int rs_A, cs_A; int rs_B, cs_B; int inc_y; FLA_Obj yT, yB; datatype = FLA_Obj_datatype( A ); m_AB = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); FLA_Part_2x1( Y, &yT, &yB, 1, FLA_TOP ); inc_y = FLA_Obj_vector_inc( yT ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_y = FLA_FLOAT_PTR( yT ); float* buff_B = FLA_FLOAT_PTR( B ); FLA_Eig_gest_nl_ops_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_y = FLA_DOUBLE_PTR( yT ); double* buff_B = FLA_DOUBLE_PTR( B ); FLA_Eig_gest_nl_opd_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_y = FLA_COMPLEX_PTR( yT ); scomplex* buff_B = FLA_COMPLEX_PTR( B ); FLA_Eig_gest_nl_opc_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yT ); dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B ); FLA_Eig_gest_nl_opz_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1( FLA_Obj A, FLA_Obj u, FLA_Obj tau, FLA_Obj a, FLA_Obj beta, FLA_Obj y, FLA_Obj w ) { /* Effective computation: y = beta * y + A' * u; a = a - conj(y) / tau; w = A * conj(a); */ FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_u, inc_a, inc_y, inc_w; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_u = FLA_Obj_vector_inc( u ); inc_a = FLA_Obj_vector_inc( a ); inc_y = FLA_Obj_vector_inc( y ); inc_w = FLA_Obj_vector_inc( w ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_u = FLA_FLOAT_PTR( u ); float* buff_a = FLA_FLOAT_PTR( a ); float* buff_y = FLA_FLOAT_PTR( y ); float* buff_w = FLA_FLOAT_PTR( w ); float* buff_tau = FLA_FLOAT_PTR( tau ); float* buff_beta = FLA_FLOAT_PTR( beta ); FLA_Fused_Ahx_Axpy_Ax_ops_var1( m_A, n_A, buff_tau, buff_beta, buff_A, rs_A, cs_A, buff_u, inc_u, buff_a, inc_a, buff_y, inc_y, buff_w, inc_w ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_u = FLA_DOUBLE_PTR( u ); double* buff_a = FLA_DOUBLE_PTR( a ); double* buff_y = FLA_DOUBLE_PTR( y ); double* buff_w = FLA_DOUBLE_PTR( w ); double* buff_tau = FLA_DOUBLE_PTR( tau ); double* buff_beta = FLA_DOUBLE_PTR( beta ); FLA_Fused_Ahx_Axpy_Ax_opd_var1( m_A, n_A, buff_tau, buff_beta, buff_A, rs_A, cs_A, buff_u, inc_u, buff_a, inc_a, buff_y, inc_y, buff_w, inc_w ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_u = FLA_COMPLEX_PTR( u ); scomplex* buff_a = FLA_COMPLEX_PTR( a ); scomplex* buff_y = FLA_COMPLEX_PTR( y ); scomplex* buff_w = FLA_COMPLEX_PTR( w ); scomplex* buff_tau = FLA_COMPLEX_PTR( tau ); scomplex* buff_beta = FLA_COMPLEX_PTR( beta ); FLA_Fused_Ahx_Axpy_Ax_opc_var1( m_A, n_A, buff_tau, buff_beta, buff_A, rs_A, cs_A, buff_u, inc_u, buff_a, inc_a, buff_y, inc_y, buff_w, inc_w ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u ); dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a ); dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w ); dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau ); dcomplex* buff_beta = FLA_DOUBLE_COMPLEX_PTR( beta ); FLA_Fused_Ahx_Axpy_Ax_opz_var1( m_A, n_A, buff_tau, buff_beta, buff_A, rs_A, cs_A, buff_u, inc_u, buff_a, inc_a, buff_y, inc_y, buff_w, inc_w ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_LU_piv_opt_var5( FLA_Obj A, FLA_Obj p ) { FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_p; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_p = FLA_Obj_vector_inc( p ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); int* buff_p = FLA_INT_PTR( p ); r_val = FLA_LU_piv_ops_var5( m_A, n_A, buff_A, rs_A, cs_A, buff_p, inc_p ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); int* buff_p = FLA_INT_PTR( p ); r_val = FLA_LU_piv_opd_var5( m_A, n_A, buff_A, rs_A, cs_A, buff_p, inc_p ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); r_val = FLA_LU_piv_opc_var5( m_A, n_A, buff_A, rs_A, cs_A, buff_p, inc_p ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); r_val = FLA_LU_piv_opz_var5( m_A, n_A, buff_A, rs_A, cs_A, buff_p, inc_p ); break; } } return r_val; }
// According to the sorted order of a given vector s, // U and V are reordered in columns while C is reordered // in rows when they need to be applied. FLA_Error FLA_Sort_bsvd_ext( FLA_Direct direct, FLA_Obj s, FLA_Bool apply_U, FLA_Obj U, FLA_Bool apply_V, FLA_Obj V, FLA_Bool apply_C, FLA_Obj C ) { FLA_Datatype datatype; dim_t m_U, rs_U, cs_U; dim_t m_V, rs_V, cs_V; dim_t n_C, rs_C, cs_C; dim_t m_s, inc_s; //if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) // FLA_Sort_bsvd_check( direct, s, // apply_U, U, // apply_V, V, // apply_C, C ); // Sort singular values only; quick sort if ( apply_U == FALSE && apply_V == FALSE ) return FLA_Sort( direct, s ); // s dimensions must be provided. m_s = FLA_Obj_vector_dim( s ); inc_s = FLA_Obj_vector_inc( s ); // Datatype of U, V and C must be consistent and must be defined from one of them. FLA_SORT_BSVD_EXT_DEFINE_OBJ_VARIABLES( U, apply_U, datatype, m_U, FLA_Obj_length, rs_U, cs_U ); FLA_SORT_BSVD_EXT_DEFINE_OBJ_VARIABLES( V, apply_V, datatype, m_V, FLA_Obj_length, rs_V, cs_V ); FLA_SORT_BSVD_EXT_DEFINE_OBJ_VARIABLES( C, apply_C, datatype, n_C, FLA_Obj_width, rs_C, cs_C ); switch ( datatype ) { case FLA_FLOAT: { float* s_p = ( float* ) FLA_FLOAT_PTR( s ); float* U_p = ( apply_U == TRUE ? ( float* ) FLA_FLOAT_PTR( U ) : NULL ); float* V_p = ( apply_V == TRUE ? ( float* ) FLA_FLOAT_PTR( V ) : NULL ); float* C_p = ( apply_C == TRUE ? ( float* ) FLA_FLOAT_PTR( C ) : NULL ); if ( direct == FLA_FORWARD ) FLA_Sort_bsvd_ext_f_ops( m_s, s_p, inc_s, m_U, U_p, rs_U, cs_U, m_V, V_p, rs_V, cs_V, n_C, C_p, rs_C, cs_C ); else // if ( direct == FLA_BACKWARD ) FLA_Sort_bsvd_ext_b_ops( m_s, s_p, inc_s, m_U, U_p, rs_U, cs_U, m_V, V_p, rs_V, cs_V, n_C, C_p, rs_C, cs_C ); break; } case FLA_DOUBLE: { double* s_p = ( double* ) FLA_DOUBLE_PTR( s ); double* U_p = ( apply_U == TRUE ? ( double* ) FLA_DOUBLE_PTR( U ) : NULL ); double* V_p = ( apply_V == TRUE ? ( double* ) FLA_DOUBLE_PTR( V ) : NULL ); double* C_p = ( apply_C == TRUE ? ( double* ) FLA_DOUBLE_PTR( C ) : NULL ); if ( direct == FLA_FORWARD ) FLA_Sort_bsvd_ext_f_opd( m_s, s_p, inc_s, m_U, U_p, rs_U, cs_U, m_V, V_p, rs_V, cs_V, n_C, C_p, rs_C, cs_C ); else // if ( direct == FLA_BACKWARD ) FLA_Sort_bsvd_ext_b_opd( m_s, s_p, inc_s, m_U, U_p, rs_U, cs_U, m_V, V_p, rs_V, cs_V, n_C, C_p, rs_C, cs_C ); break; } case FLA_COMPLEX: { float* s_p = ( float* ) FLA_FLOAT_PTR( s ); scomplex* U_p = ( apply_U == TRUE ? ( scomplex* ) FLA_COMPLEX_PTR( U ) : NULL ); scomplex* V_p = ( apply_V == TRUE ? ( scomplex* ) FLA_COMPLEX_PTR( V ) : NULL ); scomplex* C_p = ( apply_C == TRUE ? ( scomplex* ) FLA_COMPLEX_PTR( C ) : NULL ); if ( direct == FLA_FORWARD ) FLA_Sort_bsvd_ext_f_opc( m_s, s_p, inc_s, m_U, U_p, rs_U, cs_U, m_V, V_p, rs_V, cs_V, n_C, C_p, rs_C, cs_C ); else // if ( direct == FLA_BACKWARD ) FLA_Sort_bsvd_ext_b_opc( m_s, s_p, inc_s, m_U, U_p, rs_U, cs_U, m_V, V_p, rs_V, cs_V, n_C, C_p, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { double* s_p = ( double* ) FLA_DOUBLE_PTR( s ); dcomplex* U_p = ( apply_U == TRUE ? ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( U ) : NULL ); dcomplex* V_p = ( apply_V == TRUE ? ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( V ) : NULL ); dcomplex* C_p = ( apply_C == TRUE ? ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( C ) : NULL ); if ( direct == FLA_FORWARD ) FLA_Sort_bsvd_ext_f_opz( m_s, s_p, inc_s, m_U, U_p, rs_U, cs_U, m_V, V_p, rs_V, cs_V, n_C, C_p, rs_C, cs_C ); else // if ( direct == FLA_BACKWARD ) FLA_Sort_bsvd_ext_b_opz( m_s, s_p, inc_s, m_U, U_p, rs_U, cs_U, m_V, V_p, rs_V, cs_V, n_C, C_p, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Bidiag_UT_u_extract_real_diagonals( FLA_Obj A, FLA_Obj d, FLA_Obj e ) { FLA_Datatype datatype; int n_A; int rs_A, cs_A; int inc_d; int inc_e; int i; datatype = FLA_Obj_datatype( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_d = FLA_Obj_vector_inc( d ); if ( n_A != 1 ) inc_e = FLA_Obj_vector_inc( e ); else inc_e = 0; switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = ( n_A != 1 ? FLA_FLOAT_PTR( e ) : NULL ); for ( i = 0; i < n_A; ++i ) { float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; float* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A; float* delta1 = buff_d + (i )*inc_d; float* epsilon1 = buff_e + (i )*inc_e; int n_ahead = n_A - i - 1; // delta1 = alpha11; *delta1 = *alpha11; // epsilon1 = a12t_l; if ( n_ahead > 0 ) *epsilon1 = *a12t_l; } break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = ( n_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL ); for ( i = 0; i < n_A; ++i ) { double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; double* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A; double* delta1 = buff_d + (i )*inc_d; double* epsilon1 = buff_e + (i )*inc_e; int n_ahead = n_A - i - 1; // delta1 = alpha11; *delta1 = *alpha11; // epsilon1 = a12t_l; if ( n_ahead > 0 ) *epsilon1 = *a12t_l; } break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = ( n_A != 1 ? FLA_FLOAT_PTR( e ) : NULL ); for ( i = 0; i < n_A; ++i ) { scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; scomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A; float* delta1 = buff_d + (i )*inc_d; float* epsilon1 = buff_e + (i )*inc_e; int n_ahead = n_A - i - 1; // delta1 = alpha11; *delta1 = alpha11->real; // epsilon1 = a12t_l; if ( n_ahead > 0 ) *epsilon1 = a12t_l->real; } break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = ( n_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL ); for ( i = 0; i < n_A; ++i ) { dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; dcomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A; double* delta1 = buff_d + (i )*inc_d; double* epsilon1 = buff_e + (i )*inc_e; int n_ahead = n_A - i - 1; // delta1 = alpha11; *delta1 = alpha11->real; // epsilon1 = a12t_l; if ( n_ahead > 0 ) *epsilon1 = a12t_l->real; } break; } } return FLA_SUCCESS; }
FLA_Error FLA_Ger_external( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x; int inc_y; conj1_t blis_conjx; conj1_t blis_conjy; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Ger_check( alpha, x, y, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjy ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bl1_sger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bl1_dger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); bl1_cger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); bl1_zger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Bidiag_UT_realify_diagonals_opt( FLA_Obj a, FLA_Obj b, FLA_Obj d, FLA_Obj e ) { FLA_Datatype datatype; int i, m, inc_a, inc_b, inc_d, inc_e; datatype = FLA_Obj_datatype( a ); m = FLA_Obj_vector_dim( a ); inc_a = FLA_Obj_vector_inc( a ); inc_b = ( m > 1 ? FLA_Obj_vector_inc( b ) : 0 ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); float* buff_1 = FLA_FLOAT_PTR( FLA_ONE ); bl1_ssetv( m, buff_1, buff_d, inc_d ); bl1_ssetv( m, buff_1, buff_e, inc_e ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); bl1_dsetv( m, buff_1, buff_d, inc_d ); bl1_dsetv( m, buff_1, buff_e, inc_e ); break; } case FLA_COMPLEX: { scomplex* buff_a = FLA_COMPLEX_PTR( a ); scomplex* buff_b = ( m > 1 ? FLA_COMPLEX_PTR( b ) : NULL ); scomplex* buff_d = FLA_COMPLEX_PTR( d ); scomplex* buff_e = FLA_COMPLEX_PTR( e ); scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE ); float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO ); for ( i = 0; i < m; ++i ) { scomplex* alpha1 = buff_a + (i )*inc_a; scomplex* delta1 = buff_d + (i )*inc_d; scomplex* epsilon1 = buff_e + (i )*inc_e; scomplex absv; if ( i == 0 ) { *delta1 = *buff_1; } else { scomplex* beta1 = buff_b + (i-1)*inc_b; if ( beta1->imag == 0.0F ) *delta1 = *buff_1; else { bl1_ccopys( BLIS1_CONJUGATE, beta1, delta1 ); bl1_cabsval2( beta1, &absv ); bl1_cinvscals( &absv, delta1 ); bl1_cscals( delta1, beta1 ); beta1->imag = *buff_0; bl1_cscals( delta1, alpha1 ); } } if ( alpha1->imag == 0.0F ) *epsilon1 = *buff_1; else { bl1_ccopys( BLIS1_CONJUGATE, alpha1, epsilon1 ); bl1_cabsval2( alpha1, &absv ); bl1_cinvscals( &absv, epsilon1 ); bl1_cscals( epsilon1, alpha1 ); alpha1->imag = *buff_0; } if ( i < ( m - 1 ) ) { scomplex* beta2 = buff_b + (i )*inc_b; bl1_cscals( epsilon1, beta2 ); } } break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a ); dcomplex* buff_b = ( m > 1 ? FLA_DOUBLE_COMPLEX_PTR( b ) : NULL ); dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d ); dcomplex* buff_e = FLA_DOUBLE_COMPLEX_PTR( e ); dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE ); double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO ); for ( i = 0; i < m; ++i ) { dcomplex* alpha1 = buff_a + (i )*inc_a; dcomplex* delta1 = buff_d + (i )*inc_d; dcomplex* epsilon1 = buff_e + (i )*inc_e; dcomplex absv; if ( i == 0 ) { *delta1 = *buff_1; } else { dcomplex* beta1 = buff_b + (i-1)*inc_b; bl1_zcopys( BLIS1_CONJUGATE, beta1, delta1 ); bl1_zabsval2( beta1, &absv ); bl1_zinvscals( &absv, delta1 ); bl1_zscals( delta1, beta1 ); beta1->imag = *buff_0; bl1_zscals( delta1, alpha1 ); } bl1_zcopys( BLIS1_CONJUGATE, alpha1, epsilon1 ); bl1_zabsval2( alpha1, &absv ); bl1_zinvscals( &absv, epsilon1 ); bl1_zscals( epsilon1, alpha1 ); alpha1->imag = *buff_0; if ( i < ( m - 1 ) ) { dcomplex* beta2 = buff_b + (i )*inc_b; bl1_zscals( epsilon1, beta2 ); } } break; } } return FLA_SUCCESS; }
FLA_Error FLA_Nrm2_external( FLA_Obj x, FLA_Obj norm_x ) { FLA_Datatype datatype; int num_elem; int inc_x; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Nrm2_check( x, norm_x ); if ( FLA_Obj_has_zero_dim( x ) ) { FLA_Set( FLA_ZERO, norm_x ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( x ); inc_x = FLA_Obj_vector_inc( x ); num_elem = FLA_Obj_vector_dim( x ); switch ( datatype ){ case FLA_FLOAT: { float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_norm_x = ( float * ) FLA_FLOAT_PTR( norm_x ); bli_snrm2( num_elem, buff_x, inc_x, buff_norm_x ); break; } case FLA_DOUBLE: { double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_norm_x = ( double * ) FLA_DOUBLE_PTR( norm_x ); bli_dnrm2( num_elem, buff_x, inc_x, buff_norm_x ); break; } case FLA_COMPLEX: { scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); float *buff_norm_x = ( float * ) FLA_COMPLEX_PTR( norm_x ); bli_cnrm2( num_elem, buff_x, inc_x, buff_norm_x ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); double *buff_norm_x = ( double * ) FLA_DOUBLE_COMPLEX_PTR( norm_x ); bli_znrm2( num_elem, buff_x, inc_x, buff_norm_x ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Bsvd_sinval_v_opt_var1( FLA_Obj tol, FLA_Obj thresh, FLA_Obj G, FLA_Obj H, FLA_Obj d, FLA_Obj e, FLA_Obj k ) { FLA_Datatype datatype; int m_A, n_GH; int rs_G, cs_G; int rs_H, cs_H; int inc_d; int inc_e; datatype = FLA_Obj_datatype( d ); m_A = FLA_Obj_vector_dim( d ); n_GH = FLA_Obj_width( G ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_H = FLA_Obj_row_stride( H ); cs_H = FLA_Obj_col_stride( H ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); switch ( datatype ) { case FLA_FLOAT: { float* buff_tol = FLA_FLOAT_PTR( tol ); float* buff_thresh = FLA_FLOAT_PTR( thresh ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); scomplex* buff_H = FLA_COMPLEX_PTR( H ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); int* buff_k = FLA_INT_PTR( k ); FLA_Bsvd_sinval_v_ops_var1( m_A, n_GH, 9, *buff_tol, *buff_thresh, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_d, inc_d, buff_e, inc_e, buff_k ); break; } case FLA_DOUBLE: { double* buff_tol = FLA_DOUBLE_PTR( tol ); double* buff_thresh = FLA_DOUBLE_PTR( thresh ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); int* buff_k = FLA_INT_PTR( k ); FLA_Bsvd_sinval_v_opd_var1( m_A, n_GH, 9, *buff_tol, *buff_thresh, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_d, inc_d, buff_e, inc_e, buff_k ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Househ3UD_UT( FLA_Obj chi_0, FLA_Obj x1, FLA_Obj y2, FLA_Obj tau ) /* Compute an up-and-downdating UT Householder transformation / / 1 0 0 \ / 1 0 0 \ / 1 \ ( 1 u1' v2' ) \ H = | | 0 I 0 | - inv(tau) | 0 I 0 | | u1 | | \ \ 0 0 I / \ 0 0 -I / \ v2 / / by computing tau, u1, and v2 such that the following is satisfied: / chi_0 \ / alpha \ H | x1 | = | 0 | \ y2 / \ 0 / where alpha = - lambda * chi_0 / | chi_0 | lambda = sqrt( conj(chi0) chi0 + x1' x1 - y2' y2 ) / chi_0 \ x = | x1 | \ y2 / tau = ( 1 + u1' u1 - v2' v2 ) / 2 u1 = x1 / ( chi_0 - alpha ) v2 = -y2 / ( chi_0 - alpha ) Upon completion, alpha, u1, and v2 have overwritten objects chi_0, x1, and y2, respectively. -FGVZ */ { FLA_Datatype datatype; int m_x1; int m_y2; int inc_x1; int inc_y2; datatype = FLA_Obj_datatype( x1 ); m_x1 = FLA_Obj_vector_dim( x1 ); m_y2 = FLA_Obj_vector_dim( y2 ); inc_x1 = FLA_Obj_vector_inc( x1 ); inc_y2 = FLA_Obj_vector_inc( y2 ); if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Househ3UD_UT_check( chi_0, x1, y2, tau ); switch ( datatype ) { case FLA_FLOAT: { float* chi_0_p = ( float* ) FLA_FLOAT_PTR( chi_0 ); float* x1_p = ( float* ) FLA_FLOAT_PTR( x1 ); float* y2_p = ( float* ) FLA_FLOAT_PTR( y2 ); float* tau_p = ( float* ) FLA_FLOAT_PTR( tau ); FLA_Househ3UD_UT_ops( m_x1, m_y2, chi_0_p, x1_p, inc_x1, y2_p, inc_y2, tau_p ); break; } case FLA_DOUBLE: { double* chi_0_p = ( double* ) FLA_DOUBLE_PTR( chi_0 ); double* x1_p = ( double* ) FLA_DOUBLE_PTR( x1 ); double* y2_p = ( double* ) FLA_DOUBLE_PTR( y2 ); double* tau_p = ( double* ) FLA_DOUBLE_PTR( tau ); FLA_Househ3UD_UT_opd( m_x1, m_y2, chi_0_p, x1_p, inc_x1, y2_p, inc_y2, tau_p ); break; } case FLA_COMPLEX: { scomplex* chi_0_p = ( scomplex* ) FLA_COMPLEX_PTR( chi_0 ); scomplex* x1_p = ( scomplex* ) FLA_COMPLEX_PTR( x1 ); scomplex* y2_p = ( scomplex* ) FLA_COMPLEX_PTR( y2 ); scomplex* tau_p = ( scomplex* ) FLA_COMPLEX_PTR( tau ); FLA_Househ3UD_UT_opc( m_x1, m_y2, chi_0_p, x1_p, inc_x1, y2_p, inc_y2, tau_p ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* chi_0_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( chi_0 ); dcomplex* x1_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( x1 ); dcomplex* y2_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( y2 ); dcomplex* tau_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau ); FLA_Househ3UD_UT_opz( m_x1, m_y2, chi_0_p, x1_p, inc_x1, y2_p, inc_y2, tau_p ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Gemv_external( FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x; int inc_y; trans_t blis_transa; conj_t blis_conjx; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Gemv_check( transa, alpha, A, x, beta, y ); if ( FLA_Obj_has_zero_dim( A ) ) { FLA_Scal_external( beta, y ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_trans( transa, &blis_transa ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_sgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bli_cgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bli_zgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_HUD_UT_l_opt_var1( FLA_Obj tau, FLA_Obj w12t, FLA_Obj r12t, FLA_Obj u1, FLA_Obj C2, FLA_Obj v1, FLA_Obj D2 ) { FLA_Datatype datatype; int m_u1_C2; int m_v1_D2; int n_r12t; int inc_u1; int inc_v1; int inc_w12t; int inc_r12t; int rs_C2, cs_C2; int rs_D2, cs_D2; if ( FLA_Obj_has_zero_dim( r12t ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( C2 ); m_u1_C2 = FLA_Obj_length( u1 ); m_v1_D2 = FLA_Obj_length( v1 ); n_r12t = FLA_Obj_width( r12t ); inc_w12t = FLA_Obj_vector_inc( w12t ); inc_r12t = FLA_Obj_vector_inc( r12t ); inc_u1 = FLA_Obj_vector_inc( u1 ); rs_C2 = FLA_Obj_row_stride( C2 ); cs_C2 = FLA_Obj_col_stride( C2 ); inc_v1 = FLA_Obj_vector_inc( v1 ); rs_D2 = FLA_Obj_row_stride( D2 ); cs_D2 = FLA_Obj_col_stride( D2 ); switch ( datatype ) { case FLA_FLOAT: { float* tau_p = ( float* ) FLA_FLOAT_PTR( tau ); float* w12t_p = ( float* ) FLA_FLOAT_PTR( w12t ); float* r12t_p = ( float* ) FLA_FLOAT_PTR( r12t ); float* u1_p = ( float* ) FLA_FLOAT_PTR( u1 ); float* C2_p = ( float* ) FLA_FLOAT_PTR( C2 ); float* v1_p = ( float* ) FLA_FLOAT_PTR( v1 ); float* D2_p = ( float* ) FLA_FLOAT_PTR( D2 ); FLA_Apply_HUD_UT_l_ops_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } case FLA_DOUBLE: { double* tau_p = ( double* ) FLA_DOUBLE_PTR( tau ); double* w12t_p = ( double* ) FLA_DOUBLE_PTR( w12t ); double* r12t_p = ( double* ) FLA_DOUBLE_PTR( r12t ); double* u1_p = ( double* ) FLA_DOUBLE_PTR( u1 ); double* C2_p = ( double* ) FLA_DOUBLE_PTR( C2 ); double* v1_p = ( double* ) FLA_DOUBLE_PTR( v1 ); double* D2_p = ( double* ) FLA_DOUBLE_PTR( D2 ); FLA_Apply_HUD_UT_l_opd_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } case FLA_COMPLEX: { scomplex* tau_p = ( scomplex* ) FLA_COMPLEX_PTR( tau ); scomplex* w12t_p = ( scomplex* ) FLA_COMPLEX_PTR( w12t ); scomplex* r12t_p = ( scomplex* ) FLA_COMPLEX_PTR( r12t ); scomplex* u1_p = ( scomplex* ) FLA_COMPLEX_PTR( u1 ); scomplex* C2_p = ( scomplex* ) FLA_COMPLEX_PTR( C2 ); scomplex* v1_p = ( scomplex* ) FLA_COMPLEX_PTR( v1 ); scomplex* D2_p = ( scomplex* ) FLA_COMPLEX_PTR( D2 ); FLA_Apply_HUD_UT_l_opc_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* tau_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau ); dcomplex* w12t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( w12t ); dcomplex* r12t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( r12t ); dcomplex* u1_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( u1 ); dcomplex* C2_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( C2 ); dcomplex* v1_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( v1 ); dcomplex* D2_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( D2 ); FLA_Apply_HUD_UT_l_opz_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tevd_v_opt_var2( dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg ) { FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int m_A, m_U, n_G; int inc_d; int inc_e; int rs_G, cs_G; int rs_R, cs_R; int rs_U, cs_U; int rs_W, cs_W; datatype = FLA_Obj_datatype( U ); m_A = FLA_Obj_vector_dim( d ); m_U = FLA_Obj_length( U ); n_G = FLA_Obj_width( G ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_R = FLA_Obj_row_stride( R ); cs_R = FLA_Obj_col_stride( R ); rs_W = FLA_Obj_row_stride( W ); cs_W = FLA_Obj_col_stride( W ); rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_R = FLA_FLOAT_PTR( R ); float* buff_W = FLA_FLOAT_PTR( W ); float* buff_U = FLA_FLOAT_PTR( U ); r_val = FLA_Tevd_v_ops_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_R = FLA_DOUBLE_PTR( R ); double* buff_W = FLA_DOUBLE_PTR( W ); double* buff_U = FLA_DOUBLE_PTR( U ); r_val = FLA_Tevd_v_opd_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_COMPLEX: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_R = FLA_FLOAT_PTR( R ); scomplex* buff_W = FLA_COMPLEX_PTR( W ); scomplex* buff_U = FLA_COMPLEX_PTR( U ); r_val = FLA_Tevd_v_opc_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_R = FLA_DOUBLE_PTR( R ); dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W ); dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U ); r_val = FLA_Tevd_v_opz_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } } return r_val; }
FLA_Error FLA_Fused_Ahx_Ax_opt_var1( FLA_Obj A, FLA_Obj x, FLA_Obj v, FLA_Obj w ) { /* Effective computation: v = A' * x; w = A * x; */ FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x, inc_v, inc_w; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_v = FLA_Obj_vector_inc( v ); inc_w = FLA_Obj_vector_inc( w ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_x = FLA_FLOAT_PTR( x ); float* buff_v = FLA_FLOAT_PTR( v ); float* buff_w = FLA_FLOAT_PTR( w ); FLA_Fused_Ahx_Ax_ops_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_x = FLA_DOUBLE_PTR( x ); double* buff_v = FLA_DOUBLE_PTR( v ); double* buff_w = FLA_DOUBLE_PTR( w ); FLA_Fused_Ahx_Ax_opd_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_x = FLA_COMPLEX_PTR( x ); scomplex* buff_v = FLA_COMPLEX_PTR( v ); scomplex* buff_w = FLA_COMPLEX_PTR( w ); FLA_Fused_Ahx_Ax_opc_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_x = FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v ); dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w ); FLA_Fused_Ahx_Ax_opz_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Sort( FLA_Direct direct, FLA_Obj x ) { FLA_Datatype datatype; FLA_Obj x_use; dim_t m_x; dim_t inc_x; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Sort_check( direct, x ); datatype = FLA_Obj_datatype( x ); m_x = FLA_Obj_vector_dim( x ); inc_x = FLA_Obj_vector_inc( x ); // If the vector does not have unit stride, copy it to a temporary vector // that does have unit stride. if ( inc_x != 1 ) { FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, x, &x_use ); inc_x = FLA_Obj_vector_inc( x_use ); } else { x_use = x; } switch ( datatype ) { case FLA_FLOAT: { float* x_p = ( float* ) FLA_FLOAT_PTR( x_use ); if ( direct == FLA_FORWARD ) FLA_Sort_f_ops( m_x, x_p, inc_x ); else // if ( direct == FLA_BACKWARD ) FLA_Sort_b_ops( m_x, x_p, inc_x ); break; } case FLA_DOUBLE: { double* x_p = ( double* ) FLA_DOUBLE_PTR( x_use ); if ( direct == FLA_FORWARD ) FLA_Sort_f_opd( m_x, x_p, inc_x ); else // if ( direct == FLA_BACKWARD ) FLA_Sort_b_opd( m_x, x_p, inc_x ); break; } } if ( inc_x != 1 ) { FLA_Copy( x_use, x ); FLA_Obj_free( &x_use ); } return FLA_SUCCESS; }
FLA_Error FLA_Trmv_external( FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_x; uplo1_t blis_uplo; trans1_t blis_trans; diag1_t blis_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trmv_check( uplo, trans, diag, A, x ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); FLA_Param_map_flame_to_blis_diag( diag, &blis_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); bl1_strmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); bl1_dtrmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); bl1_ctrmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); bl1_ztrmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Herc_external( FLA_Uplo uplo, FLA_Conj conj, FLA_Obj alpha, FLA_Obj x, FLA_Obj A ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_x; uplo_t blis_uplo; conj_t blis_conj; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Herc_check( uplo, conj, alpha, x, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_conj( conj, &blis_conj ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bli_ssyr( blis_uplo, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bli_dsyr( blis_uplo, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bli_cher( blis_uplo, blis_conj, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bli_zher( blis_uplo, blis_conj, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Symv_external( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_x; int inc_y; uplo1_t blis_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Symv_check( uplo, alpha, A, x, beta, y ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_ssymv( blis_uplo, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dsymv( blis_uplo, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_csymv( blis_uplo, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_zsymv( blis_uplo, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } } return FLA_SUCCESS; }