FLA_Error FLA_Tevd_eigval_v_opt_var1( FLA_Obj G, FLA_Obj d, FLA_Obj e, FLA_Obj k ) { FLA_Datatype datatype; int m_A, n_G; int rs_G, cs_G; int inc_d; int inc_e; datatype = FLA_Obj_datatype( d ); m_A = FLA_Obj_vector_dim( d ); n_G = FLA_Obj_width( G ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); int* buff_k = FLA_INT_PTR( k ); FLA_Tevd_eigval_v_ops_var1( m_A, n_G, buff_G, rs_G, cs_G, buff_d, inc_d, buff_e, inc_e, buff_k ); break; } case FLA_DOUBLE: { dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); int* buff_k = FLA_INT_PTR( k ); FLA_Tevd_eigval_v_opd_var1( m_A, n_G, buff_G, rs_G, cs_G, buff_d, inc_d, buff_e, inc_e, buff_k ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Chol_u_opt_var3( FLA_Obj A ) { FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int mn_A; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); mn_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); r_val = FLA_Chol_u_ops_var3( mn_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); r_val = FLA_Chol_u_opd_var3( mn_A, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); r_val = FLA_Chol_u_opc_var3( mn_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); r_val = FLA_Chol_u_opz_var3( mn_A, buff_A, rs_A, cs_A ); break; } } return r_val; }
FLA_Error FLA_Obj_create_conf_to( FLA_Trans trans, FLA_Obj obj_cur, FLA_Obj *obj_new ) { FLA_Datatype datatype; FLA_Elemtype elemtype; dim_t m, n; dim_t rs, cs; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_create_conf_to_check( trans, obj_cur, obj_new ); datatype = FLA_Obj_datatype( obj_cur ); elemtype = FLA_Obj_elemtype( obj_cur ); // Query the dimensions of the existing object. if ( trans == FLA_NO_TRANSPOSE || trans == FLA_CONJ_NO_TRANSPOSE ) { m = FLA_Obj_length( obj_cur ); n = FLA_Obj_width( obj_cur ); } else // if ( trans == FLA_TRANSPOSE || trans == FLA_CONJ_TRANSPOSE ) { m = FLA_Obj_width( obj_cur ); n = FLA_Obj_length( obj_cur ); } // Query the row and column strides of the existing object. We don't care // about the actual leading dimension of the existing object, only whether // it is in row- or column-major format. rs = FLA_Obj_row_stride( obj_cur ); cs = FLA_Obj_col_stride( obj_cur ); if ( ( rs == 1 && cs == 1 ) ) { // Do nothing. This special case will be handled by FLA_adjust_strides(). ; } else if ( rs == 1 ) { // For column-major storage, use the m dimension as the column stride. // Row stride is already set to 1. cs = m; } else if ( cs == 1 ) { // For row-major storage, use the n dimension as the row stride. // Column stride is already set to 1. rs = n; } // Handle empty views. if ( m == 0 ) cs = 1; if ( n == 0 ) rs = 1; FLA_Obj_create_ext( datatype, elemtype, m, n, m, n, rs, cs, obj_new ); return FLA_SUCCESS; }
FLA_Error FLA_Check_col_storage( FLA_Obj A ) { FLA_Error e_val = FLA_SUCCESS; if ( FLA_Obj_row_stride( A ) != 1 ) e_val = FLA_EXPECTED_COL_STORAGE; return e_val; }
FLA_Bool FLA_Obj_is_col_major( FLA_Obj A ) { FLA_Bool r_val = FALSE; // A row stride of 1 indicates column-major storage. if ( FLA_Obj_row_stride( A ) == 1 ) r_val = TRUE; return r_val; }
void FLA_CAQR_UT_inc_init_structure( dim_t p, dim_t nb_part, FLA_Obj R ) { dim_t m, n; dim_t rs, cs; dim_t i, j, ip; FLA_Obj* buff_R; m = FLA_Obj_length( R ); n = FLA_Obj_width( R ); rs = FLA_Obj_row_stride( R ); cs = FLA_Obj_col_stride( R ); buff_R = FLA_Obj_buffer_at_view( R ); // Fill in R by row panels. for ( ip = 0; ip < p; ++ip ) { FLA_Obj* buff_R1 = buff_R + (ip*nb_part)*rs; int m_behind = ip*nb_part; int m_ahead = m - m_behind; int m_cur = min( nb_part, m_ahead ); int n_cur = n; // Iterate across columns for the current panel. for ( j = 0; j < n_cur; ++j ) { FLA_Obj* rho = buff_R1 + j*cs; // Mark the above-diagonal blocks as full. for ( i = 0; i < j; ++i ) { rho->base->uplo = FLA_FULL_MATRIX; rho += rs; } // Mark the diagonal block as triangular. rho->base->uplo = FLA_UPPER_TRIANGULAR; rho += rs; // Mark the below-diagonal blocks as zero. for ( i = j + 1; i < m_cur; ++i ) { rho->base->uplo = FLA_ZERO_MATRIX; rho += rs; } } } }
FLA_Error FLA_Bidiag_UT_create_T( FLA_Obj A, FLA_Obj* TU, FLA_Obj* TV ) { FLA_Datatype datatype; dim_t b_alg, k; dim_t rs_T, cs_T; // Query the datatype of A. datatype = FLA_Obj_datatype( A ); // Query the blocksize from the library. b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); // Scale the blocksize by a pre-set global constant. b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_BIDIAG_INNER_TO_OUTER_B_RATIO ); // Query the minimum dimension of A. k = FLA_Obj_min_dim( A ); b_alg = 5; // Adjust the blocksize with respect to the min-dim of A. b_alg = min( b_alg, k ); // Figure out whether TU and TV should be row-major or column-major. if ( FLA_Obj_row_stride( A ) == 1 ) { rs_T = 1; cs_T = b_alg; } else // if ( FLA_Obj_col_stride( A ) == 1 ) { rs_T = k; cs_T = 1; } // Create two b_alg x k matrices to hold the block Householder transforms // that will be accumulated within the bidiagonal reduction algorithm. // If the matrix dimension has a zero dimension, apply_q complains it. if ( TU != NULL ) FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, TU ); if ( TV != NULL ) FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, TV ); return FLA_SUCCESS; }
void* FLA_Obj_buffer_at_view( FLA_Obj obj ) { char* buffer; size_t elem_size, offm, offn, rs, cs; size_t byte_offset; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_buffer_at_view_check( obj ); elem_size = ( size_t ) FLA_Obj_elem_size( obj ); rs = ( size_t ) FLA_Obj_row_stride( obj ); cs = ( size_t ) FLA_Obj_col_stride( obj ); offm = ( size_t ) obj.offm; offn = ( size_t ) obj.offn; byte_offset = elem_size * ( offn * cs + offm * rs ); buffer = ( char * ) (obj.base)->buffer; return ( void* ) ( buffer + byte_offset ); }
FLA_Error FLA_LQ_UT_create_T( FLA_Obj A, FLA_Obj* T ) { FLA_Datatype datatype; dim_t b_alg, k; dim_t rs_T, cs_T; // Query the datatype of A. datatype = FLA_Obj_datatype( A ); // Query the blocksize from the library. b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); // Scale the blocksize by a pre-set global constant. b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_LQ_INNER_TO_OUTER_B_RATIO ); // Adjust the blocksize with respect to the min-dim of A. b_alg = min(b_alg, FLA_Obj_min_dim( A )); // Query the length of A. k = FLA_Obj_length( A ); // Figure out whether T should be row-major or column-major. if ( FLA_Obj_row_stride( A ) == 1 ) { rs_T = 1; cs_T = b_alg; } else // if ( FLA_Obj_col_stride( A ) == 1 ) { rs_T = k; cs_T = 1; } // Create a b_alg x k matrix to hold the block Householder transforms that // will be accumulated within the LQ factorization algorithm. FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, T ); return FLA_SUCCESS; }
FLA_Error FLA_Sylv_nh_opt_var1( FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale ) { FLA_Datatype datatype; int m_C, n_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; int info; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); switch ( datatype ) { case FLA_FLOAT: { int* buff_isgn = FLA_INT_PTR( isgn ); float* buff_A = FLA_FLOAT_PTR( A ); float* buff_B = FLA_FLOAT_PTR( B ); float* buff_C = FLA_FLOAT_PTR( C ); float* buff_scale = FLA_FLOAT_PTR( scale ); float sgn = ( float ) *buff_isgn; FLA_Sylv_nh_ops_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } case FLA_DOUBLE: { int* buff_isgn = FLA_INT_PTR( isgn ); double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_B = FLA_DOUBLE_PTR( B ); double* buff_C = FLA_DOUBLE_PTR( C ); double* buff_scale = FLA_DOUBLE_PTR( scale ); double sgn = ( double ) *buff_isgn; FLA_Sylv_nh_opd_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } case FLA_COMPLEX: { int* buff_isgn = FLA_INT_PTR( isgn ); scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_B = FLA_COMPLEX_PTR( B ); scomplex* buff_C = FLA_COMPLEX_PTR( C ); scomplex* buff_scale = FLA_COMPLEX_PTR( scale ); float sgn = ( float ) *buff_isgn; FLA_Sylv_nh_opc_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } case FLA_DOUBLE_COMPLEX: { int* buff_isgn = FLA_INT_PTR( isgn ); dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex* buff_C = FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex* buff_scale = FLA_DOUBLE_COMPLEX_PTR( scale ); double sgn = ( double ) *buff_isgn; FLA_Sylv_nh_opz_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Set_diag( FLA_Obj alpha, FLA_Obj A ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Set_diag_check( alpha, A ); datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ){ case FLA_INT: { int *buff_A = ( int * ) FLA_INT_PTR( A ); int *buff_alpha = ( int * ) FLA_INT_PTR( alpha ); bli_isetdiag( 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bli_ssetdiag( 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bli_dsetdiag( 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); bli_csetdiag( 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); bli_zsetdiag( 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_LU_piv_opt_var5( FLA_Obj A, FLA_Obj p ) { FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_p; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_p = FLA_Obj_vector_inc( p ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); int* buff_p = FLA_INT_PTR( p ); r_val = FLA_LU_piv_ops_var5( m_A, n_A, buff_A, rs_A, cs_A, buff_p, inc_p ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); int* buff_p = FLA_INT_PTR( p ); r_val = FLA_LU_piv_opd_var5( m_A, n_A, buff_A, rs_A, cs_A, buff_p, inc_p ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); r_val = FLA_LU_piv_opc_var5( m_A, n_A, buff_A, rs_A, cs_A, buff_p, inc_p ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); r_val = FLA_LU_piv_opz_var5( m_A, n_A, buff_A, rs_A, cs_A, buff_p, inc_p ); break; } } return r_val; }
FLA_Bool FLA_Obj_has_nan( FLA_Obj A ) { FLA_Datatype datatype; dim_t i, j, m, n, cs, rs; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_has_nan_check( A ); datatype = FLA_Obj_datatype( A ); m = FLA_Obj_length( A ); n = FLA_Obj_width( A ); cs = FLA_Obj_col_stride( A ); rs = FLA_Obj_row_stride( A ); switch ( datatype ) { case FLA_FLOAT: { float *buff = ( float * ) FLA_FLOAT_PTR( A ); for ( j=0; j<n; ++j ) for ( i=0; i<m; ++i ) { float val = buff[i*cs + j*rs]; if ( val != val ) return TRUE; } break; } case FLA_DOUBLE: { double *buff = ( double * ) FLA_DOUBLE_PTR( A ); for ( j=0; j<n; ++j ) for ( i=0; i<m; ++i ) { double val = buff[i*cs + j*rs]; if ( val != val ) return TRUE; } break; } case FLA_COMPLEX: { scomplex *buff = ( scomplex * ) FLA_COMPLEX_PTR( A ); for ( j=0; j<n; ++j ) for ( i=0; i<m; ++i ) { scomplex val = buff[i*cs + j*rs]; if ( val.real != val.real || val.imag != val.imag ) return TRUE; } break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); for ( j=0; j<n; ++j ) for ( i=0; i<m; ++i ) { dcomplex val = buff[i*cs + j*rs]; if ( val.real != val.real || val.imag != val.imag ) return TRUE; } break; } } return FALSE; }
FLA_Error FLA_Apply_G_lf_blk_var3( FLA_Obj G, FLA_Obj A, dim_t b_alg ) { FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); n_A = FLA_Obj_length( A ); m_A = FLA_Obj_width( A ); cs_A = FLA_Obj_row_stride( A ); rs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tevd_v_opt_var2( dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg ) { FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int m_A, m_U, n_G; int inc_d; int inc_e; int rs_G, cs_G; int rs_R, cs_R; int rs_U, cs_U; int rs_W, cs_W; datatype = FLA_Obj_datatype( U ); m_A = FLA_Obj_vector_dim( d ); m_U = FLA_Obj_length( U ); n_G = FLA_Obj_width( G ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_R = FLA_Obj_row_stride( R ); cs_R = FLA_Obj_col_stride( R ); rs_W = FLA_Obj_row_stride( W ); cs_W = FLA_Obj_col_stride( W ); rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_R = FLA_FLOAT_PTR( R ); float* buff_W = FLA_FLOAT_PTR( W ); float* buff_U = FLA_FLOAT_PTR( U ); r_val = FLA_Tevd_v_ops_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_R = FLA_DOUBLE_PTR( R ); double* buff_W = FLA_DOUBLE_PTR( W ); double* buff_U = FLA_DOUBLE_PTR( U ); r_val = FLA_Tevd_v_opd_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_COMPLEX: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_R = FLA_FLOAT_PTR( R ); scomplex* buff_W = FLA_COMPLEX_PTR( W ); scomplex* buff_U = FLA_COMPLEX_PTR( U ); r_val = FLA_Tevd_v_opc_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_R = FLA_DOUBLE_PTR( R ); dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W ); dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U ); r_val = FLA_Tevd_v_opz_var2( m_A, m_U, n_G, n_iter_max, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_R, rs_R, cs_R, buff_W, rs_W, cs_W, buff_U, rs_U, cs_U, b_alg ); break; } } return r_val; }
FLA_Error FLA_Fused_Gerc2_opt_var1( FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A ) { /* Effective computation: A = A + alpha * ( u * y' + z * v' ); */ FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_u, inc_y, inc_z, inc_v; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_u = FLA_Obj_vector_inc( u ); inc_y = FLA_Obj_vector_inc( y ); inc_z = FLA_Obj_vector_inc( z ); inc_v = FLA_Obj_vector_inc( v ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_u = FLA_FLOAT_PTR( u ); float* buff_y = FLA_FLOAT_PTR( y ); float* buff_z = FLA_FLOAT_PTR( z ); float* buff_v = FLA_FLOAT_PTR( v ); float* buff_alpha = FLA_FLOAT_PTR( alpha ); FLA_Fused_Gerc2_ops_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_u = FLA_DOUBLE_PTR( u ); double* buff_y = FLA_DOUBLE_PTR( y ); double* buff_z = FLA_DOUBLE_PTR( z ); double* buff_v = FLA_DOUBLE_PTR( v ); double* buff_alpha = FLA_DOUBLE_PTR( alpha ); FLA_Fused_Gerc2_opd_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_u = FLA_COMPLEX_PTR( u ); scomplex* buff_y = FLA_COMPLEX_PTR( y ); scomplex* buff_z = FLA_COMPLEX_PTR( z ); scomplex* buff_v = FLA_COMPLEX_PTR( v ); scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha ); FLA_Fused_Gerc2_opc_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u ); dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z ); dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v ); dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha ); FLA_Fused_Gerc2_opz_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Copy_external( FLA_Obj A, FLA_Obj B ) { FLA_Datatype dt_A; FLA_Datatype dt_B; int m_B, n_B; int rs_A, cs_A; int rs_B, cs_B; trans_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Copy_check( A, B ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; dt_A = FLA_Obj_datatype( A ); dt_B = FLA_Obj_datatype( B ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); if ( FLA_Obj_is_conformal_to( FLA_NO_TRANSPOSE, A, B ) ) FLA_Param_map_flame_to_blis_trans( FLA_NO_TRANSPOSE, &blis_trans ); else // if ( FLA_Obj_is_conformal_to( FLA_TRANSPOSE, A, B ) ) FLA_Param_map_flame_to_blis_trans( FLA_TRANSPOSE, &blis_trans ); // If A is of type FLA_CONSTANT, then we have to proceed based on the // datatype of B. if ( dt_A == FLA_CONSTANT ) { if ( dt_B == FLA_FLOAT ) { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bli_scopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE ) { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bli_dcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_COMPLEX ) { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bli_ccopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bli_zcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } } else if ( dt_A == FLA_INT ) { int* buff_A = ( int * ) FLA_INT_PTR( A ); int* buff_B = ( int * ) FLA_INT_PTR( B ); bli_icopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_A == FLA_FLOAT ) { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); if ( dt_B == FLA_FLOAT ) { float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bli_scopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE ) { double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bli_sdcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_COMPLEX ) { scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bli_sccopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bli_szcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } } else if ( dt_A == FLA_DOUBLE ) { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); if ( dt_B == FLA_FLOAT ) { float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bli_dscopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE ) { double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bli_dcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_COMPLEX ) { scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bli_dccopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bli_dzcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } } else if ( dt_A == FLA_COMPLEX ) { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); if ( dt_B == FLA_FLOAT ) { float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bli_cscopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE ) { double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bli_cdcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_COMPLEX ) { scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bli_ccopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bli_czcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } } else if ( dt_A == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); if ( dt_B == FLA_FLOAT ) { float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bli_zscopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE ) { double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bli_zdcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_COMPLEX ) { scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bli_zccopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bli_zcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } } return FLA_SUCCESS; }
FLA_Error FLA_Add_to_diag( void* diag_value, FLA_Obj A ) { FLA_Datatype datatype; dim_t j, min_m_n; dim_t rs, cs; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Add_to_diag_check( diag_value, A ); datatype = FLA_Obj_datatype( A ); min_m_n = FLA_Obj_min_dim( A ); rs = FLA_Obj_row_stride( A ); cs = FLA_Obj_col_stride( A ); switch ( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *value_ptr = ( float * ) diag_value; for ( j = 0; j < min_m_n; j++ ) buff_A[ j*cs + j*rs ] += *value_ptr; break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *value_ptr = ( double * ) diag_value; for ( j = 0; j < min_m_n; j++ ) buff_A[ j*cs + j*rs ] += *value_ptr; break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *value_ptr = ( scomplex * ) diag_value; for ( j = 0; j < min_m_n; j++ ) { buff_A[ j*cs + j*rs ].real += value_ptr->real; buff_A[ j*cs + j*rs ].imag += value_ptr->imag; } break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *value_ptr = ( dcomplex * ) diag_value; for ( j = 0; j < min_m_n; j++ ) { buff_A[ j*cs + j*rs ].real += value_ptr->real; buff_A[ j*cs + j*rs ].imag += value_ptr->imag; } break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_HUD_UT_l_opt_var1( FLA_Obj tau, FLA_Obj w12t, FLA_Obj r12t, FLA_Obj u1, FLA_Obj C2, FLA_Obj v1, FLA_Obj D2 ) { FLA_Datatype datatype; int m_u1_C2; int m_v1_D2; int n_r12t; int inc_u1; int inc_v1; int inc_w12t; int inc_r12t; int rs_C2, cs_C2; int rs_D2, cs_D2; if ( FLA_Obj_has_zero_dim( r12t ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( C2 ); m_u1_C2 = FLA_Obj_length( u1 ); m_v1_D2 = FLA_Obj_length( v1 ); n_r12t = FLA_Obj_width( r12t ); inc_w12t = FLA_Obj_vector_inc( w12t ); inc_r12t = FLA_Obj_vector_inc( r12t ); inc_u1 = FLA_Obj_vector_inc( u1 ); rs_C2 = FLA_Obj_row_stride( C2 ); cs_C2 = FLA_Obj_col_stride( C2 ); inc_v1 = FLA_Obj_vector_inc( v1 ); rs_D2 = FLA_Obj_row_stride( D2 ); cs_D2 = FLA_Obj_col_stride( D2 ); switch ( datatype ) { case FLA_FLOAT: { float* tau_p = ( float* ) FLA_FLOAT_PTR( tau ); float* w12t_p = ( float* ) FLA_FLOAT_PTR( w12t ); float* r12t_p = ( float* ) FLA_FLOAT_PTR( r12t ); float* u1_p = ( float* ) FLA_FLOAT_PTR( u1 ); float* C2_p = ( float* ) FLA_FLOAT_PTR( C2 ); float* v1_p = ( float* ) FLA_FLOAT_PTR( v1 ); float* D2_p = ( float* ) FLA_FLOAT_PTR( D2 ); FLA_Apply_HUD_UT_l_ops_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } case FLA_DOUBLE: { double* tau_p = ( double* ) FLA_DOUBLE_PTR( tau ); double* w12t_p = ( double* ) FLA_DOUBLE_PTR( w12t ); double* r12t_p = ( double* ) FLA_DOUBLE_PTR( r12t ); double* u1_p = ( double* ) FLA_DOUBLE_PTR( u1 ); double* C2_p = ( double* ) FLA_DOUBLE_PTR( C2 ); double* v1_p = ( double* ) FLA_DOUBLE_PTR( v1 ); double* D2_p = ( double* ) FLA_DOUBLE_PTR( D2 ); FLA_Apply_HUD_UT_l_opd_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } case FLA_COMPLEX: { scomplex* tau_p = ( scomplex* ) FLA_COMPLEX_PTR( tau ); scomplex* w12t_p = ( scomplex* ) FLA_COMPLEX_PTR( w12t ); scomplex* r12t_p = ( scomplex* ) FLA_COMPLEX_PTR( r12t ); scomplex* u1_p = ( scomplex* ) FLA_COMPLEX_PTR( u1 ); scomplex* C2_p = ( scomplex* ) FLA_COMPLEX_PTR( C2 ); scomplex* v1_p = ( scomplex* ) FLA_COMPLEX_PTR( v1 ); scomplex* D2_p = ( scomplex* ) FLA_COMPLEX_PTR( D2 ); FLA_Apply_HUD_UT_l_opc_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* tau_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau ); dcomplex* w12t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( w12t ); dcomplex* r12t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( r12t ); dcomplex* u1_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( u1 ); dcomplex* C2_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( C2 ); dcomplex* v1_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( v1 ); dcomplex* D2_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( D2 ); FLA_Apply_HUD_UT_l_opz_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var8( FLA_Obj G, FLA_Obj A ) /* Apply k sets of Givens rotations to a matrix A from the right, where each set takes the form: A := A ( G(n-1,k) ... G(1,k) G(0,k) )' = A G(0,k)' G(1,k)' ... G(n-1,k)' where Gik is the ith Givens rotation formed from the kth set, stored in the (i,k) entries of of G: Gik = / gamma_ik -sigma_ik \ \ sigma_ik gamma_ik / This variant iterates in pipelined, overlapping fashion and applies rotations to four columns at a time. -FGVZ */ { FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var8( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var8( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var8( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var8( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Bool FLA_Obj_equals( FLA_Obj A, FLA_Obj B ) { FLA_Datatype datatype_A; FLA_Datatype datatype_B; FLA_Datatype datatype; dim_t m, n; dim_t rs_A, cs_A; dim_t rs_B, cs_B; dim_t i, j; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_equals_check( A, B ); m = FLA_Obj_length( A ); n = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); datatype_A = FLA_Obj_datatype( A ); datatype_B = FLA_Obj_datatype( B ); // If A is a non-FLA_CONSTANT object, then we should proceed based on the // value of datatype_A. In such a situation, either datatype_B is an exact // match and we're fine, or datatype_B is FLA_CONSTANT, in which case we're // also covered since FLA_CONSTANT encompassas all numerical types. // If A is an FLA_CONSTANT object, then we should proceed based on the value // of datatype_B. In this case, datatype_B is either a non-FLA_CONSTANT type, // which mirrors the second sub-case above, or datatype_B is FLA_CONSTANT, // in which case both types are FLA_CONSTANT and therefore we have to handle // that case. Only if both are FLA_CONSTANTs does the FLA_CONSTANT case // statement below execute. if ( datatype_A != FLA_CONSTANT ) datatype = datatype_A; else datatype = datatype_B; switch ( datatype ) { case FLA_CONSTANT: { // We require ALL floating-point fields to be the same. float* buffs_A = ( float * ) FLA_FLOAT_PTR( A ); float* buffs_B = ( float * ) FLA_FLOAT_PTR( B ); double* buffd_A = ( double * ) FLA_DOUBLE_PTR( A ); double* buffd_B = ( double * ) FLA_DOUBLE_PTR( B ); scomplex* buffc_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex* buffc_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); dcomplex* buffz_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buffz_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); if ( *buffs_A != *buffs_B || *buffd_A != *buffd_B || buffc_A->real != buffc_B->real || buffc_A->imag != buffc_B->imag || buffz_A->real != buffz_B->real || buffz_A->imag != buffz_B->imag ) { return FALSE; } break; } case FLA_INT: { int *buff_A = ( int * ) FLA_INT_PTR( A ); int *buff_B = ( int * ) FLA_INT_PTR( B ); for ( j = 0; j < n; j++ ) for ( i = 0; i < m; i++ ) if ( buff_A[ j * cs_A + i * rs_A ] != buff_B[ j * cs_B + i * rs_B ] ) { return FALSE; } break; } case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); for ( j = 0; j < n; j++ ) for ( i = 0; i < m; i++ ) if ( buff_A[ j * cs_A + i * rs_A ] != buff_B[ j * cs_B + i * rs_B ] ) { return FALSE; } break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); for ( j = 0; j < n; j++ ) for ( i = 0; i < m; i++ ) if ( buff_A[ j * cs_A + i * rs_A ] != buff_B[ j * cs_B + i * rs_B ] ) { return FALSE; } break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); for ( j = 0; j < n; j++ ) for ( i = 0; i < m; i++ ) if ( buff_A[ j * cs_A + i * rs_A ].real != buff_B[ j * cs_B + i * rs_B ].real || buff_A[ j * cs_A + i * rs_A ].imag != buff_B[ j * cs_B + i * rs_B ].imag ) { return FALSE; } break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); for ( j = 0; j < n; j++ ) for ( i = 0; i < m; i++ ) if ( buff_A[ j * cs_A + i * rs_A ].real != buff_B[ j * cs_B + i * rs_B ].real || buff_A[ j * cs_A + i * rs_A ].imag != buff_B[ j * cs_B + i * rs_B ].imag ) { return FALSE; } break; } } return TRUE; }
FLA_Error FLA_Fused_Ahx_Ax_opt_var1( FLA_Obj A, FLA_Obj x, FLA_Obj v, FLA_Obj w ) { /* Effective computation: v = A' * x; w = A * x; */ FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x, inc_v, inc_w; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_v = FLA_Obj_vector_inc( v ); inc_w = FLA_Obj_vector_inc( w ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_x = FLA_FLOAT_PTR( x ); float* buff_v = FLA_FLOAT_PTR( v ); float* buff_w = FLA_FLOAT_PTR( w ); FLA_Fused_Ahx_Ax_ops_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_x = FLA_DOUBLE_PTR( x ); double* buff_v = FLA_DOUBLE_PTR( v ); double* buff_w = FLA_DOUBLE_PTR( w ); FLA_Fused_Ahx_Ax_opd_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_x = FLA_COMPLEX_PTR( x ); scomplex* buff_v = FLA_COMPLEX_PTR( v ); scomplex* buff_w = FLA_COMPLEX_PTR( w ); FLA_Fused_Ahx_Ax_opc_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_x = FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v ); dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w ); FLA_Fused_Ahx_Ax_opz_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Symmetrize( FLA_Uplo uplo, FLA_Obj A ) { FLA_Datatype datatype; dim_t n_A; dim_t rs_A, cs_A; conj_t blis_conj; uplo_t blis_uplo; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Symmetrize_check( uplo, A ); datatype = FLA_Obj_datatype( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conj ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch ( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); bli_ssymmize( blis_conj, blis_uplo, n_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); bli_dsymmize( blis_conj, blis_uplo, n_A, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); bli_csymmize( blis_conj, blis_uplo, n_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); bli_zsymmize( blis_conj, blis_uplo, n_A, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Trmv_external( FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_x; uplo1_t blis_uplo; trans1_t blis_trans; diag1_t blis_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trmv_check( uplo, trans, diag, A, x ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); FLA_Param_map_flame_to_blis_diag( diag, &blis_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); bl1_strmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); bl1_dtrmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); bl1_ctrmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); bl1_ztrmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Symm_external( FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Datatype datatype; int m_C, n_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; side_t blis_side; uplo_t blis_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Symm_check( side, uplo, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_ssymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dsymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bli_csymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bli_zsymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Eig_gest_nl_opt_var5( FLA_Obj A, FLA_Obj Y, FLA_Obj B ) { FLA_Datatype datatype; int m_AB; int rs_A, cs_A; int rs_B, cs_B; int inc_y; FLA_Obj yT, yB; datatype = FLA_Obj_datatype( A ); m_AB = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); FLA_Part_2x1( Y, &yT, &yB, 1, FLA_TOP ); inc_y = FLA_Obj_vector_inc( yT ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_y = FLA_FLOAT_PTR( yT ); float* buff_B = FLA_FLOAT_PTR( B ); FLA_Eig_gest_nl_ops_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_y = FLA_DOUBLE_PTR( yT ); double* buff_B = FLA_DOUBLE_PTR( B ); FLA_Eig_gest_nl_opd_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_y = FLA_COMPLEX_PTR( yT ); scomplex* buff_B = FLA_COMPLEX_PTR( B ); FLA_Eig_gest_nl_opc_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yT ); dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B ); FLA_Eig_gest_nl_opz_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Herc_external( FLA_Uplo uplo, FLA_Conj conj, FLA_Obj alpha, FLA_Obj x, FLA_Obj A ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_x; uplo_t blis_uplo; conj_t blis_conj; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Herc_check( uplo, conj, alpha, x, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_conj( conj, &blis_conj ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bli_ssyr( blis_uplo, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bli_dsyr( blis_uplo, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bli_cher( blis_uplo, blis_conj, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bli_zher( blis_uplo, blis_conj, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_pivots_rt_opt_var1( FLA_Obj p, FLA_Obj A ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_p; int k1_0, k2_0; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); // Swap the stride; FLA_Apply_pivots_ln_ops_var1 already consider the memory access pattern. cs_A = FLA_Obj_row_stride( A ); rs_A = FLA_Obj_col_stride( A ); // Use minus increment of the ln version. inc_p = FLA_Obj_vector_inc( p ); // Use zero-based indices. k1_0 = 0; k2_0 = ( int ) FLA_Obj_vector_dim( p ) - 1; switch ( datatype ) { case FLA_INT: { int* buff_A = FLA_INT_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opi_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_ops_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opd_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opc_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opz_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Hess_UT_step_opt_var4( FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T ) { FLA_Datatype datatype; int m_A, m_T; int rs_A, cs_A; int rs_Y, cs_Y; int rs_Z, cs_Z; int rs_T, cs_T; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); m_T = FLA_Obj_length( T ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_Y = FLA_Obj_row_stride( Y ); cs_Y = FLA_Obj_col_stride( Y ); rs_Z = FLA_Obj_row_stride( Z ); cs_Z = FLA_Obj_col_stride( Z ); rs_T = FLA_Obj_row_stride( T ); cs_T = FLA_Obj_col_stride( T ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_Y = FLA_FLOAT_PTR( Y ); float* buff_Z = FLA_FLOAT_PTR( Z ); float* buff_T = FLA_FLOAT_PTR( T ); FLA_Hess_UT_step_ops_var4( m_A, m_T, buff_A, rs_A, cs_A, buff_Y, rs_Y, cs_Y, buff_Z, rs_Z, cs_Z, buff_T, rs_T, cs_T ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_Y = FLA_DOUBLE_PTR( Y ); double* buff_Z = FLA_DOUBLE_PTR( Z ); double* buff_T = FLA_DOUBLE_PTR( T ); FLA_Hess_UT_step_opd_var4( m_A, m_T, buff_A, rs_A, cs_A, buff_Y, rs_Y, cs_Y, buff_Z, rs_Z, cs_Z, buff_T, rs_T, cs_T ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_Y = FLA_COMPLEX_PTR( Y ); scomplex* buff_Z = FLA_COMPLEX_PTR( Z ); scomplex* buff_T = FLA_COMPLEX_PTR( T ); FLA_Hess_UT_step_opc_var4( m_A, m_T, buff_A, rs_A, cs_A, buff_Y, rs_Y, cs_Y, buff_Z, rs_Z, cs_Z, buff_T, rs_T, cs_T ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y ); dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z ); dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T ); FLA_Hess_UT_step_opz_var4( m_A, m_T, buff_A, rs_A, cs_A, buff_Y, rs_Y, cs_Y, buff_Z, rs_Z, cs_Z, buff_T, rs_T, cs_T ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_LU_nopiv_opt_var2( FLA_Obj A ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); FLA_LU_nopiv_ops_var2( m_A, n_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); FLA_LU_nopiv_opd_var2( m_A, n_A, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); FLA_LU_nopiv_opc_var2( m_A, n_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); FLA_LU_nopiv_opz_var2( m_A, n_A, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }