FLA_Error FLA_Svdd_external( FLA_Svd_type jobz, FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; FLA_Datatype dt_real; FLA_Datatype dt_int; int m_A, n_A, cs_A; int cs_U; int cs_V; int min_m_n; int lwork, lrwork, liwork; FLA_Obj work, rwork, iwork; char blas_jobz; int i; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Svdd_check( jobz, A, s, U, V ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); dt_real = FLA_Obj_datatype_proj_to_real( A ); dt_int = FLA_INT; m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); cs_U = FLA_Obj_col_stride( U ); cs_V = FLA_Obj_col_stride( V ); min_m_n = min( m_A, n_A ); // Allocate the rwork and iwork arrays up front. if ( jobz == FLA_SVD_VECTORS_NONE ) lrwork = 5 * min_m_n; else lrwork = 5 * min_m_n * min_m_n + 7 * min_m_n; liwork = 8 * min_m_n; FLA_Obj_create( dt_int, liwork, 1, 0, 0, &iwork ); if ( FLA_Obj_is_complex( A ) ) FLA_Obj_create( dt_real, lrwork, 1, 0, 0, &rwork ); FLA_Param_map_flame_to_netlib_svd_type( jobz, &blas_jobz ); // Make a workspace query the first time through. This will provide us with // and ideal workspace size based on an internal block size. lwork = -1; FLA_Obj_create( datatype, 1, 1, 0, 0, &work ); for ( i = 0; i < 2; ++i ) { if ( i == 1 ) { // Grab the queried ideal workspace size from the work array, free the // work object, and then re-allocate the workspace with the ideal size. if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) lwork = ( int ) *FLA_FLOAT_PTR( work ); else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) lwork = ( int ) *FLA_DOUBLE_PTR( work ); FLA_Obj_free( &work ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work ); } switch( datatype ) { case FLA_FLOAT: { float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); float* buff_s = ( float* ) FLA_FLOAT_PTR( s ); float* buff_U = ( float* ) FLA_FLOAT_PTR( U ); float* buff_V = ( float* ) FLA_FLOAT_PTR( V ); float* buff_work = ( float* ) FLA_FLOAT_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_sgesdd( &blas_jobz, &m_A, &n_A, buff_A, &cs_A, buff_s, buff_U, &cs_U, buff_V, &cs_V, buff_work, &lwork, buff_iwork, &info ); break; } case FLA_DOUBLE: { double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); double* buff_s = ( double* ) FLA_DOUBLE_PTR( s ); double* buff_U = ( double* ) FLA_DOUBLE_PTR( U ); double* buff_V = ( double* ) FLA_DOUBLE_PTR( V ); double* buff_work = ( double* ) FLA_DOUBLE_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_dgesdd( &blas_jobz, &m_A, &n_A, buff_A, &cs_A, buff_s, buff_U, &cs_U, buff_V, &cs_V, buff_work, &lwork, buff_iwork, &info ); break; } case FLA_COMPLEX: { scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); float* buff_s = ( float* ) FLA_FLOAT_PTR( s ); scomplex* buff_U = ( scomplex* ) FLA_COMPLEX_PTR( U ); scomplex* buff_V = ( scomplex* ) FLA_COMPLEX_PTR( V ); scomplex* buff_work = ( scomplex* ) FLA_COMPLEX_PTR( work ); float* buff_rwork = ( float* ) FLA_FLOAT_PTR( rwork ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_cgesdd( &blas_jobz, &m_A, &n_A, buff_A, &cs_A, buff_s, buff_U, &cs_U, buff_V, &cs_V, buff_work, &lwork, buff_rwork, buff_iwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); double* buff_s = ( double* ) FLA_DOUBLE_PTR( s ); dcomplex* buff_U = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( U ); dcomplex* buff_V = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( V ); dcomplex* buff_work = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( work ); double* buff_rwork = ( double* ) FLA_DOUBLE_PTR( rwork ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_zgesdd( &blas_jobz, &m_A, &n_A, buff_A, &cs_A, buff_s, buff_U, &cs_U, buff_V, &cs_V, buff_work, &lwork, buff_rwork, buff_iwork, &info ); break; } } } FLA_Obj_free( &work ); FLA_Obj_free( &iwork ); if ( FLA_Obj_is_complex( A ) ) FLA_Obj_free( &rwork ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Her_external( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj x, FLA_Obj A ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_x; uplo_t blis_uplo; conj_t blis_conj; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Her_check( uplo, alpha, x, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conj ); switch( datatype ) { case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bli_ssyr( blis_uplo, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bli_dsyr( blis_uplo, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bli_cher( blis_uplo, blis_conj, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bli_zher( blis_uplo, blis_conj, m_A, buff_alpha, buff_x, inc_x, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Bidiag_blk_external( FLA_Obj A, FLA_Obj tu, FLA_Obj tv ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; int m_A, n_A, cs_A; int min_m_n, max_m_n; int lwork; FLA_Obj d, e, work_obj; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Bidiag_check( A, tu, tv ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); min_m_n = FLA_Obj_min_dim( A ); max_m_n = FLA_Obj_max_dim( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), min_m_n, 1, 0, 0, &d ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), min_m_n - 1, 1, 0, 0, &e ); lwork = (m_A + n_A) * FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work_obj ); switch( datatype ){ case FLA_FLOAT: { float* buff_A = ( float * ) FLA_FLOAT_PTR( A ); float* buff_d = ( float * ) FLA_FLOAT_PTR( d ); float* buff_e = ( float * ) FLA_FLOAT_PTR( e ); float* buff_tu = ( float * ) FLA_FLOAT_PTR( tu ); float* buff_tv = ( float * ) FLA_FLOAT_PTR( tv ); float* buff_work = ( float * ) FLA_FLOAT_PTR( work_obj ); F77_sgebrd( &m_A, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_tu, buff_tv, buff_work, &lwork, &info ); break; } case FLA_DOUBLE: { double* buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double* buff_d = ( double * ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double * ) FLA_DOUBLE_PTR( e ); double* buff_tu = ( double * ) FLA_DOUBLE_PTR( tu ); double* buff_tv = ( double * ) FLA_DOUBLE_PTR( tv ); double* buff_work = ( double * ) FLA_DOUBLE_PTR( work_obj ); F77_dgebrd( &m_A, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_tu, buff_tv, buff_work, &lwork, &info ); break; } case FLA_COMPLEX: { scomplex* buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); float* buff_d = ( float * ) FLA_FLOAT_PTR( d ); float* buff_e = ( float * ) FLA_FLOAT_PTR( e ); scomplex* buff_tu = ( scomplex * ) FLA_COMPLEX_PTR( tu ); scomplex* buff_tv = ( scomplex * ) FLA_COMPLEX_PTR( tv ); scomplex* buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work_obj ); F77_cgebrd( &m_A, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_tu, buff_tv, buff_work, &lwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); double* buff_d = ( double * ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double * ) FLA_DOUBLE_PTR( e ); dcomplex* buff_tu = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( tu ); dcomplex* buff_tv = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( tv ); dcomplex* buff_work = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( work_obj ); F77_zgebrd( &m_A, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_tu, buff_tv, buff_work, &lwork, &info ); break; } } FLA_Obj_free( &d ); FLA_Obj_free( &e ); FLA_Obj_free( &work_obj ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Apply_pivots_rt_opt_var1( FLA_Obj p, FLA_Obj A ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_p; int k1_0, k2_0; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); // Swap the stride; FLA_Apply_pivots_ln_ops_var1 already consider the memory access pattern. cs_A = FLA_Obj_row_stride( A ); rs_A = FLA_Obj_col_stride( A ); // Use minus increment of the ln version. inc_p = FLA_Obj_vector_inc( p ); // Use zero-based indices. k1_0 = 0; k2_0 = ( int ) FLA_Obj_vector_dim( p ) - 1; switch ( datatype ) { case FLA_INT: { int* buff_A = FLA_INT_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opi_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_ops_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opd_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opc_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opz_var1( m_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_LU_nopiv_opt_var2( FLA_Obj A ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); FLA_LU_nopiv_ops_var2( m_A, n_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); FLA_LU_nopiv_opd_var2( m_A, n_A, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); FLA_LU_nopiv_opc_var2( m_A, n_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); FLA_LU_nopiv_opz_var2( m_A, n_A, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Symmetrize( FLA_Uplo uplo, FLA_Obj A ) { FLA_Datatype datatype; dim_t n_A; dim_t rs_A, cs_A; conj_t blis_conj; uplo_t blis_uplo; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Symmetrize_check( uplo, A ); datatype = FLA_Obj_datatype( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conj ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch ( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); bli_ssymmize( blis_conj, blis_uplo, n_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); bli_dsymmize( blis_conj, blis_uplo, n_A, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); bli_csymmize( blis_conj, blis_uplo, n_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); bli_zsymmize( blis_conj, blis_uplo, n_A, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Eig_gest_nl_opt_var5( FLA_Obj A, FLA_Obj Y, FLA_Obj B ) { FLA_Datatype datatype; int m_AB; int rs_A, cs_A; int rs_B, cs_B; int inc_y; FLA_Obj yT, yB; datatype = FLA_Obj_datatype( A ); m_AB = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); FLA_Part_2x1( Y, &yT, &yB, 1, FLA_TOP ); inc_y = FLA_Obj_vector_inc( yT ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_y = FLA_FLOAT_PTR( yT ); float* buff_B = FLA_FLOAT_PTR( B ); FLA_Eig_gest_nl_ops_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_y = FLA_DOUBLE_PTR( yT ); double* buff_B = FLA_DOUBLE_PTR( B ); FLA_Eig_gest_nl_opd_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_y = FLA_COMPLEX_PTR( yT ); scomplex* buff_B = FLA_COMPLEX_PTR( B ); FLA_Eig_gest_nl_opc_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yT ); dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B ); FLA_Eig_gest_nl_opz_var5( m_AB, buff_A, rs_A, cs_A, buff_y, inc_y, buff_B, rs_B, cs_B ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Hess_UT_step_ofu_var4( FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T ) { FLA_Datatype datatype; int m_A, m_T; int rs_A, cs_A; int rs_Y, cs_Y; int rs_Z, cs_Z; int rs_T, cs_T; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); m_T = FLA_Obj_length( T ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_Y = FLA_Obj_row_stride( Y ); cs_Y = FLA_Obj_col_stride( Y ); rs_Z = FLA_Obj_row_stride( Z ); cs_Z = FLA_Obj_col_stride( Z ); rs_T = FLA_Obj_row_stride( T ); cs_T = FLA_Obj_col_stride( T ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_Y = FLA_FLOAT_PTR( Y ); float* buff_Z = FLA_FLOAT_PTR( Z ); float* buff_T = FLA_FLOAT_PTR( T ); FLA_Hess_UT_step_ofs_var4( m_A, m_T, buff_A, rs_A, cs_A, buff_Y, rs_Y, cs_Y, buff_Z, rs_Z, cs_Z, buff_T, rs_T, cs_T ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_Y = FLA_DOUBLE_PTR( Y ); double* buff_Z = FLA_DOUBLE_PTR( Z ); double* buff_T = FLA_DOUBLE_PTR( T ); FLA_Hess_UT_step_ofd_var4( m_A, m_T, buff_A, rs_A, cs_A, buff_Y, rs_Y, cs_Y, buff_Z, rs_Z, cs_Z, buff_T, rs_T, cs_T ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_Y = FLA_COMPLEX_PTR( Y ); scomplex* buff_Z = FLA_COMPLEX_PTR( Z ); scomplex* buff_T = FLA_COMPLEX_PTR( T ); FLA_Hess_UT_step_ofc_var4( m_A, m_T, buff_A, rs_A, cs_A, buff_Y, rs_Y, cs_Y, buff_Z, rs_Z, cs_Z, buff_T, rs_T, cs_T ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y ); dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z ); dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T ); FLA_Hess_UT_step_ofz_var4( m_A, m_T, buff_A, rs_A, cs_A, buff_Y, rs_Y, cs_Y, buff_Z, rs_Z, cs_Z, buff_T, rs_T, cs_T ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_form_Q_external( FLA_Uplo uplo, FLA_Obj A, FLA_Obj t ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; int m_A; int cs_A; int lwork; char blas_uplo; FLA_Obj work; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Tridiag_form_Q_check( uplo, A, t ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); lwork = max( 1, ( m_A - 1 ) ) * FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work ); switch( datatype ){ case FLA_FLOAT: { float* buff_A = ( float * ) FLA_FLOAT_PTR( A ); float* buff_t = ( float * ) FLA_FLOAT_PTR( t ); float* buff_work = ( float * ) FLA_FLOAT_PTR( work ); F77_sorgtr( &blas_uplo, &m_A, buff_A, &cs_A, buff_t, buff_work, &lwork, &info ); break; } case FLA_DOUBLE: { double* buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double* buff_t = ( double * ) FLA_DOUBLE_PTR( t ); double* buff_work = ( double * ) FLA_DOUBLE_PTR( work ); F77_dorgtr( &blas_uplo, &m_A, buff_A, &cs_A, buff_t, buff_work, &lwork, &info ); break; } case FLA_COMPLEX: { scomplex* buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex* buff_t = ( scomplex * ) FLA_COMPLEX_PTR( t ); scomplex* buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work ); F77_cungtr( &blas_uplo, &m_A, buff_A, &cs_A, buff_t, buff_work, &lwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_t = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t ); dcomplex *buff_work = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( work ); F77_zungtr( &blas_uplo, &m_A, buff_A, &cs_A, buff_t, buff_work, &lwork, &info ); break; } } FLA_Obj_free( &work ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Ger_external( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x; int inc_y; conj1_t blis_conjx; conj1_t blis_conjy; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Ger_check( alpha, x, y, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjy ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bl1_sger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bl1_dger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); bl1_cger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); bl1_zger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Her2k_external( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Datatype datatype; int k_AB; int m_A, n_A; int m_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; uplo_t blis_uplo; trans_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) ) { FLA_Scal_external( beta, C ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); if ( trans == FLA_NO_TRANSPOSE ) k_AB = n_A; else k_AB = m_A; FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_ssyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dsyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_cher2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_zher2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tevdr_external( FLA_Evd_type jobz, FLA_Obj d, FLA_Obj e, FLA_Obj l, FLA_Obj A ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; FLA_Datatype dt_real; int n_A, cs_A; int lisuppz, lwork, liwork; FLA_Obj isuppz, work, iwork; char blas_jobz; char blas_range; int i; int vl, vu; int il, iu; int nzc; int try_rac; int n_eig_found; //if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) // FLA_Tevdd_check( jobz, d, e, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); dt_real = FLA_Obj_datatype_proj_to_real( A ); n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_netlib_evd_type( jobz, &blas_jobz ); // Hard-code some parameters. blas_range = 'A'; nzc = n_A; try_rac = TRUE; // Allocate space for the isuppz array. lisuppz = 2 * n_A; FLA_Obj_create( FLA_INT, lisuppz, 1, 0, 0, &isuppz ); // Make a workspace query the first time through. This will provide us with // and ideal workspace size. lwork = -1; liwork = -1; FLA_Obj_create( dt_real, 1, 1, 0, 0, &work ); FLA_Obj_create( FLA_INT, 1, 1, 0, 0, &iwork ); for ( i = 0; i < 2; ++i ) { if ( i == 1 ) { // Grab the queried ideal workspace size from the work arrays, free the // work object, and then re-allocate the workspace with the ideal size. if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) { lwork = ( int ) *FLA_FLOAT_PTR( work ); liwork = ( int ) *FLA_INT_PTR( iwork ); } else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) { lwork = ( int ) *FLA_DOUBLE_PTR( work ); liwork = ( int ) *FLA_INT_PTR( iwork ); } //printf( "ideal workspace for n = %d\n", n_A ); //printf( " lwork = %d\n", lwork ); //printf( " liwork = %d\n", liwork ); FLA_Obj_free( &work ); FLA_Obj_free( &iwork ); FLA_Obj_create( dt_real, lwork, 1, 0, 0, &work ); FLA_Obj_create( FLA_INT, liwork, 1, 0, 0, &iwork ); } switch( datatype ) { case FLA_FLOAT: { float* buff_d = ( float* ) FLA_FLOAT_PTR( d ); float* buff_e = ( float* ) FLA_FLOAT_PTR( e ); float* buff_l = ( float* ) FLA_FLOAT_PTR( l ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); int* buff_isuppz = ( int* ) FLA_INT_PTR( isuppz ); float* buff_work = ( float* ) FLA_FLOAT_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_sstemr( &blas_jobz, &blas_range, &n_A, buff_d, buff_e, &vl, &vu, &il, &iu, &n_eig_found, buff_l, buff_A, &cs_A, &nzc, buff_isuppz, &try_rac, buff_work, &lwork, buff_iwork, &liwork, &info ); break; } case FLA_DOUBLE: { double* buff_d = ( double* ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double* ) FLA_DOUBLE_PTR( e ); double* buff_l = ( double* ) FLA_DOUBLE_PTR( l ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); int* buff_isuppz = ( int* ) FLA_INT_PTR( isuppz ); double* buff_work = ( double* ) FLA_DOUBLE_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_dstemr( &blas_jobz, &blas_range, &n_A, buff_d, buff_e, &vl, &vu, &il, &iu, &n_eig_found, buff_l, buff_A, &cs_A, &nzc, buff_isuppz, &try_rac, buff_work, &lwork, buff_iwork, &liwork, &info ); break; } case FLA_COMPLEX: { float* buff_d = ( float* ) FLA_FLOAT_PTR( d ); float* buff_e = ( float* ) FLA_FLOAT_PTR( e ); float* buff_l = ( float* ) FLA_FLOAT_PTR( l ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); int* buff_isuppz = ( int* ) FLA_INT_PTR( isuppz ); float* buff_work = ( float* ) FLA_FLOAT_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_cstemr( &blas_jobz, &blas_range, &n_A, buff_d, buff_e, &vl, &vu, &il, &iu, &n_eig_found, buff_l, buff_A, &cs_A, &nzc, buff_isuppz, &try_rac, buff_work, &lwork, buff_iwork, &liwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { double* buff_d = ( double* ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double* ) FLA_DOUBLE_PTR( e ); double* buff_l = ( double* ) FLA_DOUBLE_PTR( l ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); int* buff_isuppz = ( int* ) FLA_INT_PTR( isuppz ); double* buff_work = ( double* ) FLA_DOUBLE_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_zstemr( &blas_jobz, &blas_range, &n_A, buff_d, buff_e, &vl, &vu, &il, &iu, &n_eig_found, buff_l, buff_A, &cs_A, &nzc, buff_isuppz, &try_rac, buff_work, &lwork, buff_iwork, &liwork, &info ); break; } } } FLA_Obj_free( &isuppz ); FLA_Obj_free( &work ); FLA_Obj_free( &iwork ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Tridiag_unb_external( FLA_Uplo uplo, FLA_Obj A, FLA_Obj t ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; int n_A, cs_A; FLA_Obj d, e; char blas_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Tridiag_check( uplo, A, t ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), n_A, 1, 0, 0, &d ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), n_A - 1, 1, 0, 0, &e ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); switch( datatype ){ case FLA_FLOAT: { float* buff_A = ( float * ) FLA_FLOAT_PTR( A ); float* buff_d = ( float * ) FLA_FLOAT_PTR( d ); float* buff_e = ( float * ) FLA_FLOAT_PTR( e ); float* buff_t = ( float * ) FLA_FLOAT_PTR( t ); F77_ssytd2( &blas_uplo, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_t, &info ); break; } case FLA_DOUBLE: { double* buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double* buff_d = ( double * ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double * ) FLA_DOUBLE_PTR( e ); double* buff_t = ( double * ) FLA_DOUBLE_PTR( t ); F77_dsytd2( &blas_uplo, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_t, &info ); break; } case FLA_COMPLEX: { scomplex* buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); float* buff_d = ( float * ) FLA_FLOAT_PTR( d ); float* buff_e = ( float * ) FLA_FLOAT_PTR( e ); scomplex* buff_t = ( scomplex * ) FLA_COMPLEX_PTR( t ); F77_chetd2( &blas_uplo, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_t, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); double* buff_d = ( double * ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double * ) FLA_DOUBLE_PTR( e ); dcomplex* buff_t = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t ); F77_zhetd2( &blas_uplo, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_t, &info ); break; } } FLA_Obj_free( &d ); FLA_Obj_free( &e ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Bsvd_sinval_v_opt_var1( FLA_Obj tol, FLA_Obj thresh, FLA_Obj G, FLA_Obj H, FLA_Obj d, FLA_Obj e, FLA_Obj k ) { FLA_Datatype datatype; int m_A, n_GH; int rs_G, cs_G; int rs_H, cs_H; int inc_d; int inc_e; datatype = FLA_Obj_datatype( d ); m_A = FLA_Obj_vector_dim( d ); n_GH = FLA_Obj_width( G ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_H = FLA_Obj_row_stride( H ); cs_H = FLA_Obj_col_stride( H ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); switch ( datatype ) { case FLA_FLOAT: { float* buff_tol = FLA_FLOAT_PTR( tol ); float* buff_thresh = FLA_FLOAT_PTR( thresh ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); scomplex* buff_H = FLA_COMPLEX_PTR( H ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); int* buff_k = FLA_INT_PTR( k ); FLA_Bsvd_sinval_v_ops_var1( m_A, n_GH, 9, *buff_tol, *buff_thresh, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_d, inc_d, buff_e, inc_e, buff_k ); break; } case FLA_DOUBLE: { double* buff_tol = FLA_DOUBLE_PTR( tol ); double* buff_thresh = FLA_DOUBLE_PTR( thresh ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); int* buff_k = FLA_INT_PTR( k ); FLA_Bsvd_sinval_v_opd_var1( m_A, n_GH, 9, *buff_tol, *buff_thresh, buff_G, rs_G, cs_G, buff_H, rs_H, cs_H, buff_d, inc_d, buff_e, inc_e, buff_k ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Fused_Ahx_Ax_opt_var1( FLA_Obj A, FLA_Obj x, FLA_Obj v, FLA_Obj w ) { /* Effective computation: v = A' * x; w = A * x; */ FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x, inc_v, inc_w; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_v = FLA_Obj_vector_inc( v ); inc_w = FLA_Obj_vector_inc( w ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_x = FLA_FLOAT_PTR( x ); float* buff_v = FLA_FLOAT_PTR( v ); float* buff_w = FLA_FLOAT_PTR( w ); FLA_Fused_Ahx_Ax_ops_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_x = FLA_DOUBLE_PTR( x ); double* buff_v = FLA_DOUBLE_PTR( v ); double* buff_w = FLA_DOUBLE_PTR( w ); FLA_Fused_Ahx_Ax_opd_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_x = FLA_COMPLEX_PTR( x ); scomplex* buff_v = FLA_COMPLEX_PTR( v ); scomplex* buff_w = FLA_COMPLEX_PTR( w ); FLA_Fused_Ahx_Ax_opc_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_x = FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v ); dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w ); FLA_Fused_Ahx_Ax_opz_var1( m_A, n_A, buff_A, rs_A, cs_A, buff_x, inc_x, buff_v, inc_v, buff_w, inc_w ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Bidiag_UT_u_extract_real_diagonals( FLA_Obj A, FLA_Obj d, FLA_Obj e ) { FLA_Datatype datatype; int n_A; int rs_A, cs_A; int inc_d; int inc_e; int i; datatype = FLA_Obj_datatype( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_d = FLA_Obj_vector_inc( d ); if ( n_A != 1 ) inc_e = FLA_Obj_vector_inc( e ); else inc_e = 0; switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = ( n_A != 1 ? FLA_FLOAT_PTR( e ) : NULL ); for ( i = 0; i < n_A; ++i ) { float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; float* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A; float* delta1 = buff_d + (i )*inc_d; float* epsilon1 = buff_e + (i )*inc_e; int n_ahead = n_A - i - 1; // delta1 = alpha11; *delta1 = *alpha11; // epsilon1 = a12t_l; if ( n_ahead > 0 ) *epsilon1 = *a12t_l; } break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = ( n_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL ); for ( i = 0; i < n_A; ++i ) { double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; double* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A; double* delta1 = buff_d + (i )*inc_d; double* epsilon1 = buff_e + (i )*inc_e; int n_ahead = n_A - i - 1; // delta1 = alpha11; *delta1 = *alpha11; // epsilon1 = a12t_l; if ( n_ahead > 0 ) *epsilon1 = *a12t_l; } break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = ( n_A != 1 ? FLA_FLOAT_PTR( e ) : NULL ); for ( i = 0; i < n_A; ++i ) { scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; scomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A; float* delta1 = buff_d + (i )*inc_d; float* epsilon1 = buff_e + (i )*inc_e; int n_ahead = n_A - i - 1; // delta1 = alpha11; *delta1 = alpha11->real; // epsilon1 = a12t_l; if ( n_ahead > 0 ) *epsilon1 = a12t_l->real; } break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = ( n_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL ); for ( i = 0; i < n_A; ++i ) { dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A; dcomplex* a12t_l = buff_A + (i+1)*cs_A + (i )*rs_A; double* delta1 = buff_d + (i )*inc_d; double* epsilon1 = buff_e + (i )*inc_e; int n_ahead = n_A - i - 1; // delta1 = alpha11; *delta1 = alpha11->real; // epsilon1 = a12t_l; if ( n_ahead > 0 ) *epsilon1 = a12t_l->real; } break; } } return FLA_SUCCESS; }
FLA_Error FLA_Trmv_external( FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_x; uplo1_t blis_uplo; trans1_t blis_trans; diag1_t blis_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trmv_check( uplo, trans, diag, A, x ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); FLA_Param_map_flame_to_blis_diag( diag, &blis_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); bl1_strmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); bl1_dtrmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); bl1_ctrmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); bl1_ztrmv( blis_uplo, blis_trans, blis_diag, m_A, buff_A, rs_A, cs_A, buff_x, inc_x ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Random_tri_matrix( FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; uplo1_t blis_uplo; diag1_t blis_diag; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Random_tri_matrix_check( uplo, diag, A ); datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_diag( diag, &blis_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); bl1_srandmr( blis_uplo, blis_diag, m_A, n_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); bl1_drandmr( blis_uplo, blis_diag, m_A, n_A, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); bl1_crandmr( blis_uplo, blis_diag, m_A, n_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); bl1_zrandmr( blis_uplo, blis_diag, m_A, n_A, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Set_diag( FLA_Obj alpha, FLA_Obj A ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Set_diag_check( alpha, A ); datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ){ case FLA_INT: { int *buff_A = ( int * ) FLA_INT_PTR( A ); int *buff_alpha = ( int * ) FLA_INT_PTR( alpha ); bli_isetdiag( 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bli_ssetdiag( 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bli_dsetdiag( 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); bli_csetdiag( 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); bli_zsetdiag( 0, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Gemv_external( FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x; int inc_y; trans_t blis_transa; conj_t blis_conjx; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Gemv_check( transa, alpha, A, x, beta, y ); if ( FLA_Obj_has_zero_dim( A ) ) { FLA_Scal_external( beta, y ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_trans( transa, &blis_transa ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_sgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bli_cgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bli_zgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Bidiag_apply_V_external( FLA_Side side, FLA_Trans trans, FLA_Obj A, FLA_Obj t, FLA_Obj B ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; // int m_A, n_A; int m_B, n_B; int cs_A; int cs_B; int k_t; int lwork; FLA_Obj work; char blas_side; char blas_vect = 'P'; char blas_trans; int i; //if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) // FLA_Apply_Q_check( side, trans, storev, A, t, B ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); // m_A = FLA_Obj_length( A ); // n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); cs_B = FLA_Obj_col_stride( B ); if ( blas_vect == 'Q' ) k_t = FLA_Obj_vector_dim( t ); else k_t = FLA_Obj_vector_dim( t ) + 1; if ( FLA_Obj_is_real( A ) && trans == FLA_CONJ_TRANSPOSE ) trans = FLA_TRANSPOSE; FLA_Param_map_flame_to_netlib_side( side, &blas_side ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); // Make a workspace query the first time through. This will provide us with // and ideal workspace size based on an internal block size. lwork = -1; FLA_Obj_create( datatype, 1, 1, 0, 0, &work ); for ( i = 0; i < 2; ++i ) { if ( i == 1 ) { // Grab the queried ideal workspace size from the work array, free the // work object, and then re-allocate the workspace with the ideal size. if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) lwork = ( int ) *FLA_FLOAT_PTR( work ); else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) lwork = ( int ) *FLA_DOUBLE_PTR( work ); FLA_Obj_free( &work ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work ); } switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_t = ( float * ) FLA_FLOAT_PTR( t ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_work = ( float * ) FLA_FLOAT_PTR( work ); F77_sormbr( &blas_vect, &blas_side, &blas_trans, &m_B, &n_B, &k_t, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_t = ( double * ) FLA_DOUBLE_PTR( t ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_work = ( double * ) FLA_DOUBLE_PTR( work ); F77_dormbr( &blas_vect, &blas_side, &blas_trans, &m_B, &n_B, &k_t, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_t = ( scomplex * ) FLA_COMPLEX_PTR( t ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work ); F77_cunmbr( &blas_vect, &blas_side, &blas_trans, &m_B, &n_B, &k_t, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_t = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_work = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( work ); F77_zunmbr( &blas_vect, &blas_side, &blas_trans, &m_B, &n_B, &k_t, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } } } FLA_Obj_free( &work ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Sylv_nh_opt_var1( FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale ) { FLA_Datatype datatype; int m_C, n_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; int info; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); switch ( datatype ) { case FLA_FLOAT: { int* buff_isgn = FLA_INT_PTR( isgn ); float* buff_A = FLA_FLOAT_PTR( A ); float* buff_B = FLA_FLOAT_PTR( B ); float* buff_C = FLA_FLOAT_PTR( C ); float* buff_scale = FLA_FLOAT_PTR( scale ); float sgn = ( float ) *buff_isgn; FLA_Sylv_nh_ops_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } case FLA_DOUBLE: { int* buff_isgn = FLA_INT_PTR( isgn ); double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_B = FLA_DOUBLE_PTR( B ); double* buff_C = FLA_DOUBLE_PTR( C ); double* buff_scale = FLA_DOUBLE_PTR( scale ); double sgn = ( double ) *buff_isgn; FLA_Sylv_nh_opd_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } case FLA_COMPLEX: { int* buff_isgn = FLA_INT_PTR( isgn ); scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_B = FLA_COMPLEX_PTR( B ); scomplex* buff_C = FLA_COMPLEX_PTR( C ); scomplex* buff_scale = FLA_COMPLEX_PTR( scale ); float sgn = ( float ) *buff_isgn; FLA_Sylv_nh_opc_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } case FLA_DOUBLE_COMPLEX: { int* buff_isgn = FLA_INT_PTR( isgn ); dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex* buff_C = FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex* buff_scale = FLA_DOUBLE_COMPLEX_PTR( scale ); double sgn = ( double ) *buff_isgn; FLA_Sylv_nh_opz_var1( sgn, m_C, n_C, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_C, rs_C, cs_C, buff_scale, &info ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_UT_l_step_opt_var1( FLA_Obj A, FLA_Obj T ) { FLA_Datatype datatype; int m_A, m_T; int rs_A, cs_A; int rs_T, cs_T; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); m_T = FLA_Obj_length( T ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_T = FLA_Obj_row_stride( T ); cs_T = FLA_Obj_col_stride( T ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_T = FLA_FLOAT_PTR( T ); FLA_Tridiag_UT_l_step_ops_var1( m_A, m_T, buff_A, rs_A, cs_A, buff_T, rs_T, cs_T ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_T = FLA_DOUBLE_PTR( T ); FLA_Tridiag_UT_l_step_opd_var1( m_A, m_T, buff_A, rs_A, cs_A, buff_T, rs_T, cs_T ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_T = FLA_COMPLEX_PTR( T ); FLA_Tridiag_UT_l_step_opc_var1( m_A, m_T, buff_A, rs_A, cs_A, buff_T, rs_T, cs_T ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T ); FLA_Tridiag_UT_l_step_opz_var1( m_A, m_T, buff_A, rs_A, cs_A, buff_T, rs_T, cs_T ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tevd_v_opt_var1( FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj U ) { FLA_Datatype datatype; int m_A, m_U, n_G; int inc_d; int inc_e; int rs_G, cs_G; int rs_U, cs_U; datatype = FLA_Obj_datatype( U ); m_A = FLA_Obj_vector_dim( d ); m_U = FLA_Obj_length( U ); n_G = FLA_Obj_width( G ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); switch ( datatype ) { case FLA_FLOAT: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_U = FLA_FLOAT_PTR( U ); FLA_Tevd_v_ops_var1( m_A, m_U, n_G, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U ); break; } case FLA_DOUBLE: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_U = FLA_DOUBLE_PTR( U ); FLA_Tevd_v_opd_var1( m_A, m_U, n_G, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U ); break; } case FLA_COMPLEX: { float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); scomplex* buff_G = FLA_COMPLEX_PTR( G ); scomplex* buff_U = FLA_COMPLEX_PTR( U ); FLA_Tevd_v_opc_var1( m_A, m_U, n_G, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U ); break; } case FLA_DOUBLE_COMPLEX: { double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U ); FLA_Tevd_v_opz_var1( m_A, m_U, n_G, buff_d, inc_d, buff_e, inc_e, buff_G, rs_G, cs_G, buff_U, rs_U, cs_U ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Fused_Gerc2_opt_var1( FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A ) { /* Effective computation: A = A + alpha * ( u * y' + z * v' ); */ FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_u, inc_y, inc_z, inc_v; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_u = FLA_Obj_vector_inc( u ); inc_y = FLA_Obj_vector_inc( y ); inc_z = FLA_Obj_vector_inc( z ); inc_v = FLA_Obj_vector_inc( v ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_u = FLA_FLOAT_PTR( u ); float* buff_y = FLA_FLOAT_PTR( y ); float* buff_z = FLA_FLOAT_PTR( z ); float* buff_v = FLA_FLOAT_PTR( v ); float* buff_alpha = FLA_FLOAT_PTR( alpha ); FLA_Fused_Gerc2_ops_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_u = FLA_DOUBLE_PTR( u ); double* buff_y = FLA_DOUBLE_PTR( y ); double* buff_z = FLA_DOUBLE_PTR( z ); double* buff_v = FLA_DOUBLE_PTR( v ); double* buff_alpha = FLA_DOUBLE_PTR( alpha ); FLA_Fused_Gerc2_opd_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_u = FLA_COMPLEX_PTR( u ); scomplex* buff_y = FLA_COMPLEX_PTR( y ); scomplex* buff_z = FLA_COMPLEX_PTR( z ); scomplex* buff_v = FLA_COMPLEX_PTR( v ); scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha ); FLA_Fused_Gerc2_opc_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u ); dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z ); dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v ); dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha ); FLA_Fused_Gerc2_opz_var1( m_A, n_A, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var9( FLA_Obj G, FLA_Obj A ) /* Apply k sets of Givens rotations to a matrix A from the right, where each set takes the form: A := A ( G(n-1,k) ... G(1,k) G(0,k) )' = A G(0,k)' G(1,k)' ... G(n-1,k)' where Gik is the ith Givens rotation formed from the kth set, stored in the (i,k) entries of of G: Gik = / gamma_ik -sigma_ik \ \ sigma_ik gamma_ik / This variant iterates in pipelined, overlapping fashion and applies rotations to two columns at a time. -FGVZ */ { FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Copy_external( FLA_Obj A, FLA_Obj B ) { FLA_Datatype dt_A; FLA_Datatype dt_B; int m_B, n_B; int rs_A, cs_A; int rs_B, cs_B; trans_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Copy_check( A, B ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; dt_A = FLA_Obj_datatype( A ); dt_B = FLA_Obj_datatype( B ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); if ( FLA_Obj_is_conformal_to( FLA_NO_TRANSPOSE, A, B ) ) FLA_Param_map_flame_to_blis_trans( FLA_NO_TRANSPOSE, &blis_trans ); else // if ( FLA_Obj_is_conformal_to( FLA_TRANSPOSE, A, B ) ) FLA_Param_map_flame_to_blis_trans( FLA_TRANSPOSE, &blis_trans ); // If A is of type FLA_CONSTANT, then we have to proceed based on the // datatype of B. if ( dt_A == FLA_CONSTANT ) { if ( dt_B == FLA_FLOAT ) { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bli_scopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE ) { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bli_dcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_COMPLEX ) { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bli_ccopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bli_zcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } } else if ( dt_A == FLA_INT ) { int* buff_A = ( int * ) FLA_INT_PTR( A ); int* buff_B = ( int * ) FLA_INT_PTR( B ); bli_icopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_A == FLA_FLOAT ) { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); if ( dt_B == FLA_FLOAT ) { float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bli_scopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE ) { double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bli_sdcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_COMPLEX ) { scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bli_sccopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bli_szcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } } else if ( dt_A == FLA_DOUBLE ) { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); if ( dt_B == FLA_FLOAT ) { float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bli_dscopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE ) { double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bli_dcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_COMPLEX ) { scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bli_dccopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bli_dzcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } } else if ( dt_A == FLA_COMPLEX ) { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); if ( dt_B == FLA_FLOAT ) { float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bli_cscopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE ) { double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bli_cdcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_COMPLEX ) { scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bli_ccopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bli_czcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } } else if ( dt_A == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); if ( dt_B == FLA_FLOAT ) { float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bli_zscopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE ) { double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bli_zdcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_COMPLEX ) { scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bli_zccopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } else if ( dt_B == FLA_DOUBLE_COMPLEX ) { dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bli_zcopymt( blis_trans, m_B, n_B, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_HUD_UT_l_opt_var1( FLA_Obj tau, FLA_Obj w12t, FLA_Obj r12t, FLA_Obj u1, FLA_Obj C2, FLA_Obj v1, FLA_Obj D2 ) { FLA_Datatype datatype; int m_u1_C2; int m_v1_D2; int n_r12t; int inc_u1; int inc_v1; int inc_w12t; int inc_r12t; int rs_C2, cs_C2; int rs_D2, cs_D2; if ( FLA_Obj_has_zero_dim( r12t ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( C2 ); m_u1_C2 = FLA_Obj_length( u1 ); m_v1_D2 = FLA_Obj_length( v1 ); n_r12t = FLA_Obj_width( r12t ); inc_w12t = FLA_Obj_vector_inc( w12t ); inc_r12t = FLA_Obj_vector_inc( r12t ); inc_u1 = FLA_Obj_vector_inc( u1 ); rs_C2 = FLA_Obj_row_stride( C2 ); cs_C2 = FLA_Obj_col_stride( C2 ); inc_v1 = FLA_Obj_vector_inc( v1 ); rs_D2 = FLA_Obj_row_stride( D2 ); cs_D2 = FLA_Obj_col_stride( D2 ); switch ( datatype ) { case FLA_FLOAT: { float* tau_p = ( float* ) FLA_FLOAT_PTR( tau ); float* w12t_p = ( float* ) FLA_FLOAT_PTR( w12t ); float* r12t_p = ( float* ) FLA_FLOAT_PTR( r12t ); float* u1_p = ( float* ) FLA_FLOAT_PTR( u1 ); float* C2_p = ( float* ) FLA_FLOAT_PTR( C2 ); float* v1_p = ( float* ) FLA_FLOAT_PTR( v1 ); float* D2_p = ( float* ) FLA_FLOAT_PTR( D2 ); FLA_Apply_HUD_UT_l_ops_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } case FLA_DOUBLE: { double* tau_p = ( double* ) FLA_DOUBLE_PTR( tau ); double* w12t_p = ( double* ) FLA_DOUBLE_PTR( w12t ); double* r12t_p = ( double* ) FLA_DOUBLE_PTR( r12t ); double* u1_p = ( double* ) FLA_DOUBLE_PTR( u1 ); double* C2_p = ( double* ) FLA_DOUBLE_PTR( C2 ); double* v1_p = ( double* ) FLA_DOUBLE_PTR( v1 ); double* D2_p = ( double* ) FLA_DOUBLE_PTR( D2 ); FLA_Apply_HUD_UT_l_opd_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } case FLA_COMPLEX: { scomplex* tau_p = ( scomplex* ) FLA_COMPLEX_PTR( tau ); scomplex* w12t_p = ( scomplex* ) FLA_COMPLEX_PTR( w12t ); scomplex* r12t_p = ( scomplex* ) FLA_COMPLEX_PTR( r12t ); scomplex* u1_p = ( scomplex* ) FLA_COMPLEX_PTR( u1 ); scomplex* C2_p = ( scomplex* ) FLA_COMPLEX_PTR( C2 ); scomplex* v1_p = ( scomplex* ) FLA_COMPLEX_PTR( v1 ); scomplex* D2_p = ( scomplex* ) FLA_COMPLEX_PTR( D2 ); FLA_Apply_HUD_UT_l_opc_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* tau_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau ); dcomplex* w12t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( w12t ); dcomplex* r12t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( r12t ); dcomplex* u1_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( u1 ); dcomplex* C2_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( C2 ); dcomplex* v1_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( v1 ); dcomplex* D2_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( D2 ); FLA_Apply_HUD_UT_l_opz_var1( m_u1_C2, m_v1_D2, n_r12t, tau_p, w12t_p, inc_w12t, r12t_p, inc_r12t, u1_p, inc_u1, C2_p, rs_C2, cs_C2, v1_p, inc_v1, D2_p, rs_D2, cs_D2 ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_lf_blk_var3( FLA_Obj G, FLA_Obj A, dim_t b_alg ) { FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); n_A = FLA_Obj_length( A ); m_A = FLA_Obj_width( A ); cs_A = FLA_Obj_row_stride( A ); rs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Max_abs_value( FLA_Obj A, FLA_Obj maxabs ) { FLA_Datatype datatype; FLA_Datatype dt_maxabs; dim_t m_A, n_A; dim_t rs_A, cs_A; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Max_abs_value_check( A, maxabs ); datatype = FLA_Obj_datatype( A ); dt_maxabs = FLA_Obj_datatype( maxabs ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ){ case FLA_FLOAT: { float* buff_A = ( float * ) FLA_FLOAT_PTR( A ); float* buff_maxabs = ( float * ) FLA_FLOAT_PTR( maxabs ); bl1_smaxabsm( m_A, n_A, buff_A, rs_A, cs_A, buff_maxabs ); break; } case FLA_DOUBLE: { double* buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double* buff_maxabs = ( double * ) FLA_DOUBLE_PTR( maxabs ); bl1_dmaxabsm( m_A, n_A, buff_A, rs_A, cs_A, buff_maxabs ); break; } case FLA_COMPLEX: { if ( dt_maxabs == FLA_FLOAT ) { scomplex* buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); float* buff_maxabs = ( float * ) FLA_FLOAT_PTR( maxabs ); bl1_cmaxabsm( m_A, n_A, buff_A, rs_A, cs_A, buff_maxabs ); } else { scomplex* buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex* buff_maxabs = ( scomplex * ) FLA_COMPLEX_PTR( maxabs ); bl1_cmaxabsm( m_A, n_A, buff_A, rs_A, cs_A, &(buff_maxabs->real) ); buff_maxabs->imag = 0.0; } break; } case FLA_DOUBLE_COMPLEX: { if ( dt_maxabs == FLA_DOUBLE ) { dcomplex* buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); double* buff_maxabs = ( double * ) FLA_DOUBLE_PTR( maxabs ); bl1_zmaxabsm( m_A, n_A, buff_A, rs_A, cs_A, buff_maxabs ); } else { dcomplex* buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_maxabs = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( maxabs ); bl1_zmaxabsm( m_A, n_A, buff_A, rs_A, cs_A, &(buff_maxabs->real) ); buff_maxabs->imag = 0.0; } break; } } return FLA_SUCCESS; }