FLA_Error FLA_Tridiag_form_Q_external( FLA_Uplo uplo, FLA_Obj A, FLA_Obj t ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; int m_A; int cs_A; int lwork; char blas_uplo; FLA_Obj work; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Tridiag_form_Q_check( uplo, A, t ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); lwork = max( 1, ( m_A - 1 ) ) * FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work ); switch( datatype ){ case FLA_FLOAT: { float* buff_A = ( float * ) FLA_FLOAT_PTR( A ); float* buff_t = ( float * ) FLA_FLOAT_PTR( t ); float* buff_work = ( float * ) FLA_FLOAT_PTR( work ); F77_sorgtr( &blas_uplo, &m_A, buff_A, &cs_A, buff_t, buff_work, &lwork, &info ); break; } case FLA_DOUBLE: { double* buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double* buff_t = ( double * ) FLA_DOUBLE_PTR( t ); double* buff_work = ( double * ) FLA_DOUBLE_PTR( work ); F77_dorgtr( &blas_uplo, &m_A, buff_A, &cs_A, buff_t, buff_work, &lwork, &info ); break; } case FLA_COMPLEX: { scomplex* buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex* buff_t = ( scomplex * ) FLA_COMPLEX_PTR( t ); scomplex* buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work ); F77_cungtr( &blas_uplo, &m_A, buff_A, &cs_A, buff_t, buff_work, &lwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_t = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t ); dcomplex *buff_work = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( work ); F77_zungtr( &blas_uplo, &m_A, buff_A, &cs_A, buff_t, buff_work, &lwork, &info ); break; } } FLA_Obj_free( &work ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Ger_external( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x; int inc_y; conj1_t blis_conjx; conj1_t blis_conjy; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Ger_check( alpha, x, y, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjy ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); bl1_sger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); bl1_dger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); bl1_cger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); bl1_zger( blis_conjx, blis_conjy, m_A, n_A, buff_alpha, buff_x, inc_x, buff_y, inc_y, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Her2k_external( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Datatype datatype; int k_AB; int m_A, n_A; int m_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; uplo_t blis_uplo; trans_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) ) { FLA_Scal_external( beta, C ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); if ( trans == FLA_NO_TRANSPOSE ) k_AB = n_A; else k_AB = m_A; FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_ssyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dsyr2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_cher2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_zher2k( blis_uplo, blis_trans, m_C, k_AB, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Nrm2_external( FLA_Obj x, FLA_Obj norm_x ) { FLA_Datatype datatype; int num_elem; int inc_x; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Nrm2_check( x, norm_x ); if ( FLA_Obj_has_zero_dim( x ) ) { FLA_Set( FLA_ZERO, norm_x ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( x ); inc_x = FLA_Obj_vector_inc( x ); num_elem = FLA_Obj_vector_dim( x ); switch ( datatype ){ case FLA_FLOAT: { float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_norm_x = ( float * ) FLA_FLOAT_PTR( norm_x ); bli_snrm2( num_elem, buff_x, inc_x, buff_norm_x ); break; } case FLA_DOUBLE: { double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_norm_x = ( double * ) FLA_DOUBLE_PTR( norm_x ); bli_dnrm2( num_elem, buff_x, inc_x, buff_norm_x ); break; } case FLA_COMPLEX: { scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); float *buff_norm_x = ( float * ) FLA_COMPLEX_PTR( norm_x ); bli_cnrm2( num_elem, buff_x, inc_x, buff_norm_x ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); double *buff_norm_x = ( double * ) FLA_DOUBLE_COMPLEX_PTR( norm_x ); bli_znrm2( num_elem, buff_x, inc_x, buff_norm_x ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tevdr_external( FLA_Evd_type jobz, FLA_Obj d, FLA_Obj e, FLA_Obj l, FLA_Obj A ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; FLA_Datatype dt_real; int n_A, cs_A; int lisuppz, lwork, liwork; FLA_Obj isuppz, work, iwork; char blas_jobz; char blas_range; int i; int vl, vu; int il, iu; int nzc; int try_rac; int n_eig_found; //if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) // FLA_Tevdd_check( jobz, d, e, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); dt_real = FLA_Obj_datatype_proj_to_real( A ); n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_netlib_evd_type( jobz, &blas_jobz ); // Hard-code some parameters. blas_range = 'A'; nzc = n_A; try_rac = TRUE; // Allocate space for the isuppz array. lisuppz = 2 * n_A; FLA_Obj_create( FLA_INT, lisuppz, 1, 0, 0, &isuppz ); // Make a workspace query the first time through. This will provide us with // and ideal workspace size. lwork = -1; liwork = -1; FLA_Obj_create( dt_real, 1, 1, 0, 0, &work ); FLA_Obj_create( FLA_INT, 1, 1, 0, 0, &iwork ); for ( i = 0; i < 2; ++i ) { if ( i == 1 ) { // Grab the queried ideal workspace size from the work arrays, free the // work object, and then re-allocate the workspace with the ideal size. if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) { lwork = ( int ) *FLA_FLOAT_PTR( work ); liwork = ( int ) *FLA_INT_PTR( iwork ); } else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) { lwork = ( int ) *FLA_DOUBLE_PTR( work ); liwork = ( int ) *FLA_INT_PTR( iwork ); } //printf( "ideal workspace for n = %d\n", n_A ); //printf( " lwork = %d\n", lwork ); //printf( " liwork = %d\n", liwork ); FLA_Obj_free( &work ); FLA_Obj_free( &iwork ); FLA_Obj_create( dt_real, lwork, 1, 0, 0, &work ); FLA_Obj_create( FLA_INT, liwork, 1, 0, 0, &iwork ); } switch( datatype ) { case FLA_FLOAT: { float* buff_d = ( float* ) FLA_FLOAT_PTR( d ); float* buff_e = ( float* ) FLA_FLOAT_PTR( e ); float* buff_l = ( float* ) FLA_FLOAT_PTR( l ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); int* buff_isuppz = ( int* ) FLA_INT_PTR( isuppz ); float* buff_work = ( float* ) FLA_FLOAT_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_sstemr( &blas_jobz, &blas_range, &n_A, buff_d, buff_e, &vl, &vu, &il, &iu, &n_eig_found, buff_l, buff_A, &cs_A, &nzc, buff_isuppz, &try_rac, buff_work, &lwork, buff_iwork, &liwork, &info ); break; } case FLA_DOUBLE: { double* buff_d = ( double* ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double* ) FLA_DOUBLE_PTR( e ); double* buff_l = ( double* ) FLA_DOUBLE_PTR( l ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); int* buff_isuppz = ( int* ) FLA_INT_PTR( isuppz ); double* buff_work = ( double* ) FLA_DOUBLE_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_dstemr( &blas_jobz, &blas_range, &n_A, buff_d, buff_e, &vl, &vu, &il, &iu, &n_eig_found, buff_l, buff_A, &cs_A, &nzc, buff_isuppz, &try_rac, buff_work, &lwork, buff_iwork, &liwork, &info ); break; } case FLA_COMPLEX: { float* buff_d = ( float* ) FLA_FLOAT_PTR( d ); float* buff_e = ( float* ) FLA_FLOAT_PTR( e ); float* buff_l = ( float* ) FLA_FLOAT_PTR( l ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); int* buff_isuppz = ( int* ) FLA_INT_PTR( isuppz ); float* buff_work = ( float* ) FLA_FLOAT_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_cstemr( &blas_jobz, &blas_range, &n_A, buff_d, buff_e, &vl, &vu, &il, &iu, &n_eig_found, buff_l, buff_A, &cs_A, &nzc, buff_isuppz, &try_rac, buff_work, &lwork, buff_iwork, &liwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { double* buff_d = ( double* ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double* ) FLA_DOUBLE_PTR( e ); double* buff_l = ( double* ) FLA_DOUBLE_PTR( l ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); int* buff_isuppz = ( int* ) FLA_INT_PTR( isuppz ); double* buff_work = ( double* ) FLA_DOUBLE_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_zstemr( &blas_jobz, &blas_range, &n_A, buff_d, buff_e, &vl, &vu, &il, &iu, &n_eig_found, buff_l, buff_A, &cs_A, &nzc, buff_isuppz, &try_rac, buff_work, &lwork, buff_iwork, &liwork, &info ); break; } } } FLA_Obj_free( &isuppz ); FLA_Obj_free( &work ); FLA_Obj_free( &iwork ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Trsmsx_external( FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Datatype datatype; int m_B, n_B; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; side_t blis_side; uplo_t blis_uplo; trans_t blis_trans; diag_t blis_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trsmsx_check( side, uplo, trans, diag, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_trans( trans, &blis_trans ); FLA_Param_map_flame_to_blis_diag( diag, &blis_diag ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_strsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dtrsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bli_ctrsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bli_ztrsmsx( blis_side, blis_uplo, blis_trans, blis_diag, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_unb_external( FLA_Uplo uplo, FLA_Obj A, FLA_Obj t ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; int n_A, cs_A; FLA_Obj d, e; char blas_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Tridiag_check( uplo, A, t ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), n_A, 1, 0, 0, &d ); FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), n_A - 1, 1, 0, 0, &e ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); switch( datatype ){ case FLA_FLOAT: { float* buff_A = ( float * ) FLA_FLOAT_PTR( A ); float* buff_d = ( float * ) FLA_FLOAT_PTR( d ); float* buff_e = ( float * ) FLA_FLOAT_PTR( e ); float* buff_t = ( float * ) FLA_FLOAT_PTR( t ); F77_ssytd2( &blas_uplo, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_t, &info ); break; } case FLA_DOUBLE: { double* buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double* buff_d = ( double * ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double * ) FLA_DOUBLE_PTR( e ); double* buff_t = ( double * ) FLA_DOUBLE_PTR( t ); F77_dsytd2( &blas_uplo, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_t, &info ); break; } case FLA_COMPLEX: { scomplex* buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); float* buff_d = ( float * ) FLA_FLOAT_PTR( d ); float* buff_e = ( float * ) FLA_FLOAT_PTR( e ); scomplex* buff_t = ( scomplex * ) FLA_COMPLEX_PTR( t ); F77_chetd2( &blas_uplo, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_t, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); double* buff_d = ( double * ) FLA_DOUBLE_PTR( d ); double* buff_e = ( double * ) FLA_DOUBLE_PTR( e ); dcomplex* buff_t = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t ); F77_zhetd2( &blas_uplo, &n_A, buff_A, &cs_A, buff_d, buff_e, buff_t, &info ); break; } } FLA_Obj_free( &d ); FLA_Obj_free( &e ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Hemm_external( FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C ) { FLA_Datatype datatype; int m_C, n_C; int rs_A, cs_A; int rs_B, cs_B; int rs_C, cs_C; side1_t blis_side; uplo1_t blis_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Hemm_check( side, uplo, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); rs_C = FLA_Obj_row_stride( C ); cs_C = FLA_Obj_col_stride( C ); FLA_Param_map_flame_to_blis_side( side, &blis_side ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_C = ( float * ) FLA_FLOAT_PTR( C ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bl1_ssymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_C = ( double * ) FLA_DOUBLE_PTR( C ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bl1_dsymm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bl1_chemm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bl1_zhemm( blis_side, blis_uplo, m_C, n_C, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B, buff_beta, buff_C, rs_C, cs_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Syrk_external_gpu( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu ) { FLA_Datatype datatype; int k_A; int m_A, n_A; int m_C; int ldim_A; int ldim_C; char blas_uplo; char blas_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Syrk_check( uplo, trans, alpha, A, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); ldim_A = FLA_Obj_length( A ); m_C = FLA_Obj_length( C ); ldim_C = FLA_Obj_length( C ); if ( trans == FLA_NO_TRANSPOSE ) k_A = n_A; else k_A = m_A; FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasSsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( float * ) A_gpu, ldim_A, *buff_beta, ( float * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasDsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( double * ) A_gpu, ldim_A, *buff_beta, ( double * ) C_gpu, ldim_C ); break; } case FLA_COMPLEX: { cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha ); cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta ); cublasCsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, *buff_beta, ( cuComplex * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); cublasZsyrk( blas_uplo, blas_trans, m_C, k_A, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, *buff_beta, ( cuDoubleComplex * ) C_gpu, ldim_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Setr( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; uplo1_t blis_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Setr_check( uplo, alpha, A ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); switch ( datatype ) { case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); bl1_ssetmr( blis_uplo, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); bl1_dsetmr( blis_uplo, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); bl1_csetmr( blis_uplo, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); bl1_zsetmr( blis_uplo, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_SA_LU_unb( FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L ) { FLA_Datatype datatype; int m_U, cs_U; int m_D, cs_D; int cs_L; // int rs_U; int rs_D; // int rs_L; int m_U_min_j, m_U_min_j_min_1; int j, ipiv; int* buff_p; if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( U ); m_U = FLA_Obj_length( U ); // rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); m_D = FLA_Obj_length( D ); rs_D = FLA_Obj_row_stride( D ); cs_D = FLA_Obj_col_stride( D ); // rs_L = FLA_Obj_row_stride( L ); cs_L = FLA_Obj_col_stride( L ); FLA_Copy_external( U, L ); FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L ); buff_p = ( int * ) FLA_INT_PTR( p ); switch ( datatype ){ case FLA_FLOAT: { float* buff_U = ( float * ) FLA_FLOAT_PTR( U ); float* buff_D = ( float * ) FLA_FLOAT_PTR( D ); float* buff_L = ( float * ) FLA_FLOAT_PTR( L ); float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE ); float L_tmp; float D_tmp; float d_inv_Ljj; for ( j = 0; j < m_U; ++j ) { bl1_samax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabsf( L_tmp ) < fabsf( D_tmp ) ) { bl1_sswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } d_inv_Ljj = 1.0F / buff_L[ j*cs_L + j ]; bl1_sscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_sger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_scopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_DOUBLE: { double* buff_U = ( double * ) FLA_DOUBLE_PTR( U ); double* buff_D = ( double * ) FLA_DOUBLE_PTR( D ); double* buff_L = ( double * ) FLA_DOUBLE_PTR( L ); double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE ); double L_tmp; double D_tmp; double d_inv_Ljj; for ( j = 0; j < m_U; ++j ) { bl1_damax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabs( L_tmp ) < fabs( D_tmp ) ) { bl1_dswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } d_inv_Ljj = 1.0 / buff_L[ j*cs_L + j ]; bl1_dscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_dger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_dcopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_COMPLEX: { scomplex* buff_U = ( scomplex * ) FLA_COMPLEX_PTR( U ); scomplex* buff_D = ( scomplex * ) FLA_COMPLEX_PTR( D ); scomplex* buff_L = ( scomplex * ) FLA_COMPLEX_PTR( L ); scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE ); scomplex L_tmp; scomplex D_tmp; scomplex d_inv_Ljj; scomplex Ljj; float temp; for ( j = 0; j < m_U; ++j ) { bl1_camax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabsf( L_tmp.real + L_tmp.imag ) < fabsf( D_tmp.real + D_tmp.imag ) ) { bl1_cswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } Ljj = buff_L[ j*cs_L + j ]; // d_inv_Ljj = 1.0 / Ljj temp = 1.0F / ( Ljj.real * Ljj.real + Ljj.imag * Ljj.imag ); d_inv_Ljj.real = Ljj.real * temp; d_inv_Ljj.imag = Ljj.imag * -temp; bl1_cscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_cger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_ccopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_U = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U ); dcomplex* buff_D = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D ); dcomplex* buff_L = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( L ); dcomplex* buff_minus1 = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE ); dcomplex L_tmp; dcomplex D_tmp; dcomplex d_inv_Ljj; dcomplex Ljj; double temp; for ( j = 0; j < m_U; ++j ) { bl1_zamax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( fabs( L_tmp.real + L_tmp.imag ) < fabs( D_tmp.real + D_tmp.imag ) ) { bl1_zswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } Ljj = buff_L[ j*cs_L + j ]; // d_inv_Ljj = 1.0 / Ljj temp = 1.0 / ( Ljj.real * Ljj.real + Ljj.imag * Ljj.imag ); d_inv_Ljj.real = Ljj.real * temp; d_inv_Ljj.imag = Ljj.imag * -temp; bl1_zscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bl1_zger( BLIS1_NO_CONJUGATE, BLIS1_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bl1_zcopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } } return FLA_SUCCESS; }
FLA_Error FLA_Hemvc_external( FLA_Uplo uplo, FLA_Conj conja, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Datatype datatype; int m_A; int rs_A, cs_A; int inc_x; int inc_y; uplo_t blis_uplo; conj_t blis_conja; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Hemvc_check( uplo, conja, alpha, A, x, beta, y ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo ); FLA_Param_map_flame_to_blis_conj( conja, &blis_conja ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_ssymv( blis_uplo, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dsymv( blis_uplo, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bli_chemv( blis_uplo, blis_conja, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bli_zhemv( blis_uplo, blis_conja, m_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Tridiag_apply_Q_external( FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Obj A, FLA_Obj t, FLA_Obj B ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; // int m_A, n_A; int m_B, n_B; int cs_A; int cs_B; int k_t; int lwork; char blas_side; char blas_uplo; char blas_trans; FLA_Obj work; int i; //if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) // FLA_Apply_Q_check( side, trans, storev, A, t, B ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); // m_A = FLA_Obj_length( A ); // n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); cs_B = FLA_Obj_col_stride( B ); k_t = FLA_Obj_vector_dim( t ); FLA_Param_map_flame_to_netlib_side( side, &blas_side ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); // Make a workspace query the first time through. This will provide us with // and ideal workspace size based on an internal block size. lwork = -1; FLA_Obj_create( datatype, 1, 1, 0, 0, &work ); for ( i = 0; i < 2; ++i ) { if ( i == 1 ) { // Grab the queried ideal workspace size from the work array, free the // work object, and then re-allocate the workspace with the ideal size. if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) lwork = ( int ) *FLA_FLOAT_PTR( work ); else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) lwork = ( int ) *FLA_DOUBLE_PTR( work ); FLA_Obj_free( &work ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work ); } switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_t = ( float * ) FLA_FLOAT_PTR( t ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); float *buff_work = ( float * ) FLA_FLOAT_PTR( work ); F77_sormtr( &blas_side, &blas_uplo, &blas_trans, &m_B, &n_B, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_t = ( double * ) FLA_DOUBLE_PTR( t ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); double *buff_work = ( double * ) FLA_DOUBLE_PTR( work ); F77_dormtr( &blas_side, &blas_uplo, &blas_trans, &m_B, &n_B, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_t = ( scomplex * ) FLA_COMPLEX_PTR( t ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); scomplex *buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work ); F77_cunmtr( &blas_side, &blas_uplo, &blas_trans, &m_B, &n_B, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_t = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); dcomplex *buff_work = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( work ); F77_zunmtr( &blas_side, &blas_uplo, &blas_trans, &m_B, &n_B, buff_A, &cs_A, buff_t, buff_B, &cs_B, buff_work, &lwork, &info ); break; } } } FLA_Obj_free( &work ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Axpy_external( FLA_Obj alpha, FLA_Obj A, FLA_Obj B ) { FLA_Datatype datatype; int m_B, n_B; int rs_A, cs_A; int rs_B, cs_B; trans1_t blis_trans; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Axpy_check( alpha, A, B ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); m_B = FLA_Obj_length( B ); n_B = FLA_Obj_width( B ); rs_B = FLA_Obj_row_stride( B ); cs_B = FLA_Obj_col_stride( B ); if ( FLA_Obj_is_conformal_to( FLA_NO_TRANSPOSE, A, B ) ) FLA_Param_map_flame_to_blis_trans( FLA_NO_TRANSPOSE, &blis_trans ); else // if ( FLA_Obj_is_conformal_to( FLA_TRANSPOSE, A, B ) ) FLA_Param_map_flame_to_blis_trans( FLA_TRANSPOSE, &blis_trans ); switch ( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_B = ( float * ) FLA_FLOAT_PTR( B ); bl1_saxpymt( blis_trans, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_B = ( double * ) FLA_DOUBLE_PTR( B ); bl1_daxpymt( blis_trans, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_COMPLEX: { scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B ); bl1_caxpymt( blis_trans, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B ); bl1_zaxpymt( blis_trans, m_B, n_B, buff_alpha, buff_A, rs_A, cs_A, buff_B, rs_B, cs_B ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Hemm_external_gpu( FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu ) { FLA_Datatype datatype; int m_C, n_C; int ldim_A; int ldim_B; int ldim_C; char blas_side; char blas_uplo; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Hemm_check( side, uplo, alpha, A, B, beta, C ); if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); ldim_A = FLA_Obj_length( A ); ldim_B = FLA_Obj_length( B ); m_C = FLA_Obj_length( C ); n_C = FLA_Obj_width( C ); ldim_C = FLA_Obj_length( C ); FLA_Param_map_flame_to_netlib_side( side, &blas_side ); FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); switch( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); cublasSsymm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( float * ) A_gpu, ldim_A, ( float * ) B_gpu, ldim_B, *buff_beta, ( float * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); cublasDsymm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( double * ) A_gpu, ldim_A, ( double * ) B_gpu, ldim_B, *buff_beta, ( double * ) C_gpu, ldim_C ); break; } case FLA_COMPLEX: { cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha ); cuComplex *buff_beta = ( cuComplex * ) FLA_COMPLEX_PTR( beta ); cublasChemm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( cuComplex * ) A_gpu, ldim_A, ( cuComplex * ) B_gpu, ldim_B, *buff_beta, ( cuComplex * ) C_gpu, ldim_C ); break; } case FLA_DOUBLE_COMPLEX: { cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); cuDoubleComplex *buff_beta = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); cublasZhemm( blas_side, blas_uplo, m_C, n_C, *buff_alpha, ( cuDoubleComplex * ) A_gpu, ldim_A, ( cuDoubleComplex * ) B_gpu, ldim_B, *buff_beta, ( cuDoubleComplex * ) C_gpu, ldim_C ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Svdd_external( FLA_Svd_type jobz, FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V ) { int info = 0; #ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES FLA_Datatype datatype; FLA_Datatype dt_real; FLA_Datatype dt_int; int m_A, n_A, cs_A; int cs_U; int cs_V; int min_m_n; int lwork, lrwork, liwork; FLA_Obj work, rwork, iwork; char blas_jobz; int i; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Svdd_check( jobz, A, s, U, V ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); dt_real = FLA_Obj_datatype_proj_to_real( A ); dt_int = FLA_INT; m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); cs_A = FLA_Obj_col_stride( A ); cs_U = FLA_Obj_col_stride( U ); cs_V = FLA_Obj_col_stride( V ); min_m_n = min( m_A, n_A ); // Allocate the rwork and iwork arrays up front. if ( jobz == FLA_SVD_VECTORS_NONE ) lrwork = 5 * min_m_n; else lrwork = 5 * min_m_n * min_m_n + 7 * min_m_n; liwork = 8 * min_m_n; FLA_Obj_create( dt_int, liwork, 1, 0, 0, &iwork ); if ( FLA_Obj_is_complex( A ) ) FLA_Obj_create( dt_real, lrwork, 1, 0, 0, &rwork ); FLA_Param_map_flame_to_netlib_svd_type( jobz, &blas_jobz ); // Make a workspace query the first time through. This will provide us with // and ideal workspace size based on an internal block size. lwork = -1; FLA_Obj_create( datatype, 1, 1, 0, 0, &work ); for ( i = 0; i < 2; ++i ) { if ( i == 1 ) { // Grab the queried ideal workspace size from the work array, free the // work object, and then re-allocate the workspace with the ideal size. if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX ) lwork = ( int ) *FLA_FLOAT_PTR( work ); else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) lwork = ( int ) *FLA_DOUBLE_PTR( work ); FLA_Obj_free( &work ); FLA_Obj_create( datatype, lwork, 1, 0, 0, &work ); } switch( datatype ) { case FLA_FLOAT: { float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); float* buff_s = ( float* ) FLA_FLOAT_PTR( s ); float* buff_U = ( float* ) FLA_FLOAT_PTR( U ); float* buff_V = ( float* ) FLA_FLOAT_PTR( V ); float* buff_work = ( float* ) FLA_FLOAT_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_sgesdd( &blas_jobz, &m_A, &n_A, buff_A, &cs_A, buff_s, buff_U, &cs_U, buff_V, &cs_V, buff_work, &lwork, buff_iwork, &info ); break; } case FLA_DOUBLE: { double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); double* buff_s = ( double* ) FLA_DOUBLE_PTR( s ); double* buff_U = ( double* ) FLA_DOUBLE_PTR( U ); double* buff_V = ( double* ) FLA_DOUBLE_PTR( V ); double* buff_work = ( double* ) FLA_DOUBLE_PTR( work ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_dgesdd( &blas_jobz, &m_A, &n_A, buff_A, &cs_A, buff_s, buff_U, &cs_U, buff_V, &cs_V, buff_work, &lwork, buff_iwork, &info ); break; } case FLA_COMPLEX: { scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); float* buff_s = ( float* ) FLA_FLOAT_PTR( s ); scomplex* buff_U = ( scomplex* ) FLA_COMPLEX_PTR( U ); scomplex* buff_V = ( scomplex* ) FLA_COMPLEX_PTR( V ); scomplex* buff_work = ( scomplex* ) FLA_COMPLEX_PTR( work ); float* buff_rwork = ( float* ) FLA_FLOAT_PTR( rwork ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_cgesdd( &blas_jobz, &m_A, &n_A, buff_A, &cs_A, buff_s, buff_U, &cs_U, buff_V, &cs_V, buff_work, &lwork, buff_rwork, buff_iwork, &info ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); double* buff_s = ( double* ) FLA_DOUBLE_PTR( s ); dcomplex* buff_U = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( U ); dcomplex* buff_V = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( V ); dcomplex* buff_work = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( work ); double* buff_rwork = ( double* ) FLA_DOUBLE_PTR( rwork ); int* buff_iwork = ( int* ) FLA_INT_PTR( iwork ); F77_zgesdd( &blas_jobz, &m_A, &n_A, buff_A, &cs_A, buff_s, buff_U, &cs_U, buff_V, &cs_V, buff_work, &lwork, buff_rwork, buff_iwork, &info ); break; } } } FLA_Obj_free( &work ); FLA_Obj_free( &iwork ); if ( FLA_Obj_is_complex( A ) ) FLA_Obj_free( &rwork ); #else FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED ); #endif return info; }
FLA_Error FLA_Gemv_external( FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y ) { FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_x; int inc_y; trans_t blis_transa; conj_t blis_conjx; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Gemv_check( transa, alpha, A, x, beta, y ); if ( FLA_Obj_has_zero_dim( A ) ) { FLA_Scal_external( beta, y ); return FLA_SUCCESS; } datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_x = FLA_Obj_vector_inc( x ); inc_y = FLA_Obj_vector_inc( y ); FLA_Param_map_flame_to_blis_trans( transa, &blis_transa ); FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx ); switch( datatype ){ case FLA_FLOAT: { float *buff_A = ( float * ) FLA_FLOAT_PTR( A ); float *buff_x = ( float * ) FLA_FLOAT_PTR( x ); float *buff_y = ( float * ) FLA_FLOAT_PTR( y ); float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); float *buff_beta = ( float * ) FLA_FLOAT_PTR( beta ); bli_sgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE: { double *buff_A = ( double * ) FLA_DOUBLE_PTR( A ); double *buff_x = ( double * ) FLA_DOUBLE_PTR( x ); double *buff_y = ( double * ) FLA_DOUBLE_PTR( y ); double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); double *buff_beta = ( double * ) FLA_DOUBLE_PTR( beta ); bli_dgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_COMPLEX: { scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A ); scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x ); scomplex *buff_y = ( scomplex * ) FLA_COMPLEX_PTR( y ); scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); scomplex *buff_beta = ( scomplex * ) FLA_COMPLEX_PTR( beta ); bli_cgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x ); dcomplex *buff_y = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); dcomplex *buff_beta = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta ); bli_zgemv( blis_transa, blis_conjx, m_A, n_A, buff_alpha, buff_A, rs_A, cs_A, buff_x, inc_x, buff_beta, buff_y, inc_y ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Trsv_external_gpu( FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, void* A_gpu, FLA_Obj x, void* x_gpu ) { FLA_Datatype datatype; int m_A; int ldim_A; int inc_x; char blas_uplo; char blas_trans; char blas_diag; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Trsv_check( uplo, trans, diag, A, x ); if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); ldim_A = FLA_Obj_length( A ); inc_x = 1; FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo ); FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans ); FLA_Param_map_flame_to_netlib_diag( diag, &blas_diag ); switch( datatype ){ case FLA_FLOAT: { cublasStrsv( blas_uplo, blas_trans, blas_diag, m_A, ( float * ) A_gpu, ldim_A, ( float * ) x_gpu, inc_x ); break; } case FLA_DOUBLE: { cublasDtrsv( blas_uplo, blas_trans, blas_diag, m_A, ( double * ) A_gpu, ldim_A, ( double * ) x_gpu, inc_x ); break; } case FLA_COMPLEX: { cublasCtrsv( blas_uplo, blas_trans, blas_diag, m_A, ( cuComplex * ) A_gpu, ldim_A, ( cuComplex * ) x_gpu, inc_x ); break; } case FLA_DOUBLE_COMPLEX: { cublasZtrsv( blas_uplo, blas_trans, blas_diag, m_A, ( cuDoubleComplex * ) A_gpu, ldim_A, ( cuDoubleComplex * ) x_gpu, inc_x ); break; } } return FLA_SUCCESS; }