FLA_Error FLA_Tridiag_form_Q_external( FLA_Uplo uplo, FLA_Obj A, FLA_Obj t )
{
  int          info = 0;
#ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES
  FLA_Datatype datatype;
  int          m_A;
  int          cs_A;
  int          lwork;
  char         blas_uplo;
  FLA_Obj      work;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
    FLA_Tridiag_form_Q_check( uplo, A, t );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  cs_A     = FLA_Obj_col_stride( A );

  FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );

  lwork  = max( 1, ( m_A - 1 ) ) * FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN );

  FLA_Obj_create( datatype, lwork, 1, 0, 0, &work );
  

  switch( datatype ){

  case FLA_FLOAT:
  {
    float*    buff_A    = ( float    * ) FLA_FLOAT_PTR( A );
    float*    buff_t    = ( float    * ) FLA_FLOAT_PTR( t );
    float*    buff_work = ( float    * ) FLA_FLOAT_PTR( work );

    F77_sorgtr( &blas_uplo,
                &m_A,
                buff_A, &cs_A,
                buff_t,
                buff_work, &lwork,
                &info );

    break;
  }

  case FLA_DOUBLE:
  {
    double*   buff_A    = ( double   * ) FLA_DOUBLE_PTR( A );
    double*   buff_t    = ( double   * ) FLA_DOUBLE_PTR( t );
    double*   buff_work = ( double   * ) FLA_DOUBLE_PTR( work );

    F77_dorgtr( &blas_uplo,
                &m_A,
                buff_A, &cs_A,
                buff_t,
                buff_work, &lwork,
                &info );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex* buff_A    = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex* buff_t    = ( scomplex * ) FLA_COMPLEX_PTR( t );
    scomplex* buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work );

    F77_cungtr( &blas_uplo,
                &m_A,
                buff_A, &cs_A,
                buff_t,
                buff_work, &lwork,
                &info );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A    = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_t    = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t );
    dcomplex *buff_work = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( work );

    F77_zungtr( &blas_uplo,
                &m_A,
                buff_A, &cs_A,
                buff_t,
                buff_work, &lwork,
                &info );

    break;
  }

  }

  FLA_Obj_free( &work );
#else
  FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED );
#endif

  return info;
}
Пример #2
0
FLA_Error FLA_Ger_external( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A )
{
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_x;
  int          inc_y;
  conj1_t       blis_conjx;
  conj1_t       blis_conjy;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Ger_check( alpha, x, y, A );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_x    = FLA_Obj_vector_inc( x );
  inc_y    = FLA_Obj_vector_inc( y );

  FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx );
  FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjy );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_x     = ( float * ) FLA_FLOAT_PTR( x );
    float *buff_y     = ( float * ) FLA_FLOAT_PTR( y );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );

    bl1_sger( blis_conjx,
              blis_conjy,
              m_A,
              n_A,
              buff_alpha,
              buff_x, inc_x,
              buff_y, inc_y,
              buff_A, rs_A, cs_A ); 

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_x     = ( double * ) FLA_DOUBLE_PTR( x );
    double *buff_y     = ( double * ) FLA_DOUBLE_PTR( y );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );

    bl1_dger( blis_conjx,
              blis_conjy,
              m_A,
              n_A,
              buff_alpha,
              buff_x, inc_x,
              buff_y, inc_y,
              buff_A, rs_A, cs_A ); 

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_x     = ( scomplex * ) FLA_COMPLEX_PTR( x );
    scomplex *buff_y     = ( scomplex * ) FLA_COMPLEX_PTR( y );
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );

    bl1_cger( blis_conjx,
              blis_conjy,
              m_A,
              n_A,
              buff_alpha,
              buff_x, inc_x,
              buff_y, inc_y,
              buff_A, rs_A, cs_A ); 

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_x     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );
    dcomplex *buff_y     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y );
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );

    bl1_zger( blis_conjx,
              blis_conjy,
              m_A,
              n_A,
              buff_alpha,
              buff_x, inc_x,
              buff_y, inc_y,
              buff_A, rs_A, cs_A ); 

    break;
  }

  }
  
  return FLA_SUCCESS;
}
Пример #3
0
FLA_Error FLA_Her2k_external( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C )
{
  FLA_Datatype datatype;
  int          k_AB;
  int          m_A, n_A;
  int          m_C;
  int          rs_A, cs_A;
  int          rs_B, cs_B;
  int          rs_C, cs_C;
  uplo_t       blis_uplo; 
  trans_t      blis_trans;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Her2k_check( uplo, trans, alpha, A, B, beta, C );

  if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;

  if ( FLA_Obj_has_zero_dim( A ) || FLA_Obj_has_zero_dim( B ) )
  {
    FLA_Scal_external( beta, C );
    return FLA_SUCCESS;
  }

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_B     = FLA_Obj_row_stride( B );
  cs_B     = FLA_Obj_col_stride( B );

  m_C      = FLA_Obj_length( C );
  rs_C     = FLA_Obj_row_stride( C );
  cs_C     = FLA_Obj_col_stride( C );

  if ( trans == FLA_NO_TRANSPOSE )
    k_AB = n_A;
  else
    k_AB = m_A;

  FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
  FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_B     = ( float * ) FLA_FLOAT_PTR( B );
    float *buff_C     = ( float * ) FLA_FLOAT_PTR( C );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );

    bli_ssyr2k( blis_uplo,
                blis_trans,
                m_C,
                k_AB,
                buff_alpha,
                buff_A, rs_A, cs_A,
                buff_B, rs_B, cs_B,
                buff_beta,
                buff_C, rs_C, cs_C );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_B     = ( double * ) FLA_DOUBLE_PTR( B );
    double *buff_C     = ( double * ) FLA_DOUBLE_PTR( C );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );

    bli_dsyr2k( blis_uplo,
                blis_trans,
                m_C,
                k_AB,
                buff_alpha,
                buff_A, rs_A, cs_A,
                buff_B, rs_B, cs_B,
                buff_beta,
                buff_C, rs_C, cs_C );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_B     = ( scomplex * ) FLA_COMPLEX_PTR( B );
    scomplex *buff_C     = ( scomplex * ) FLA_COMPLEX_PTR( C );
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
    float    *buff_beta  = ( float    * ) FLA_FLOAT_PTR( beta );

    bli_cher2k( blis_uplo,
                blis_trans,
                m_C,
                k_AB,
                buff_alpha,
                buff_A, rs_A, cs_A,
                buff_B, rs_B, cs_B,
                buff_beta,
                buff_C, rs_C, cs_C );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_B     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
    dcomplex *buff_C     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    double   *buff_beta  = ( double   * ) FLA_DOUBLE_PTR( beta );

    bli_zher2k( blis_uplo,
                blis_trans,
                m_C,
                k_AB,
                buff_alpha,
                buff_A, rs_A, cs_A,
                buff_B, rs_B, cs_B,
                buff_beta,
                buff_C, rs_C, cs_C );

    break;
  }

  }
 
  return FLA_SUCCESS;
}
Пример #4
0
FLA_Error FLA_Nrm2_external( FLA_Obj x, FLA_Obj norm_x )
{
  FLA_Datatype datatype;
  int          num_elem;
  int          inc_x;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Nrm2_check( x, norm_x );

  if ( FLA_Obj_has_zero_dim( x ) )
  {
    FLA_Set( FLA_ZERO, norm_x );
    return FLA_SUCCESS;
  }

  datatype = FLA_Obj_datatype( x );

  inc_x    = FLA_Obj_vector_inc( x );
  num_elem = FLA_Obj_vector_dim( x );


  switch ( datatype ){

  case FLA_FLOAT:
  {
    float *buff_x      = ( float * ) FLA_FLOAT_PTR( x );
    float *buff_norm_x = ( float * ) FLA_FLOAT_PTR( norm_x );

    bli_snrm2( num_elem,
               buff_x, inc_x,
               buff_norm_x );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_x      = ( double * ) FLA_DOUBLE_PTR( x );
    double *buff_norm_x = ( double * ) FLA_DOUBLE_PTR( norm_x );

    bli_dnrm2( num_elem,
               buff_x, inc_x,
               buff_norm_x );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_x      = ( scomplex * ) FLA_COMPLEX_PTR( x );
    float    *buff_norm_x = ( float    * ) FLA_COMPLEX_PTR( norm_x );

    bli_cnrm2( num_elem,
               buff_x, inc_x,
               buff_norm_x );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_x      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );
    double   *buff_norm_x = ( double   * ) FLA_DOUBLE_COMPLEX_PTR( norm_x );

    bli_znrm2( num_elem,
               buff_x, inc_x,
               buff_norm_x );

    break;
  }

  }
  
  return FLA_SUCCESS;
}
Пример #5
0
FLA_Error FLA_Tevdr_external( FLA_Evd_type jobz, FLA_Obj d, FLA_Obj e, FLA_Obj l, FLA_Obj A )
{
  int          info = 0;
#ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES
  FLA_Datatype datatype;
  FLA_Datatype dt_real;
  int          n_A, cs_A;
  int          lisuppz, lwork, liwork;
  FLA_Obj      isuppz, work, iwork;
  char         blas_jobz;
  char         blas_range;
  int          i;
  int          vl, vu;
  int          il, iu;
  int          nzc;
  int          try_rac;
  int          n_eig_found;

  //if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
  //  FLA_Tevdd_check( jobz, d, e, A );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );
  dt_real  = FLA_Obj_datatype_proj_to_real( A );

  n_A      = FLA_Obj_width( A );
  cs_A     = FLA_Obj_col_stride( A );

  FLA_Param_map_flame_to_netlib_evd_type( jobz, &blas_jobz );

  // Hard-code some parameters.
  blas_range = 'A';
  nzc        = n_A;
  try_rac    = TRUE;

  // Allocate space for the isuppz array.
  lisuppz = 2 * n_A;
  FLA_Obj_create( FLA_INT, lisuppz, 1, 0, 0, &isuppz );

  // Make a workspace query the first time through. This will provide us with
  // and ideal workspace size.
  lwork = -1;
  liwork = -1;
  FLA_Obj_create( dt_real, 1, 1, 0, 0, &work );
  FLA_Obj_create( FLA_INT, 1, 1, 0, 0, &iwork );

  for ( i = 0; i < 2; ++i )
  {
    if ( i == 1 )
    {
      // Grab the queried ideal workspace size from the work arrays, free the
      // work object, and then re-allocate the workspace with the ideal size.
      if      ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX )
      {
        lwork  = ( int ) *FLA_FLOAT_PTR( work );
        liwork = ( int ) *FLA_INT_PTR( iwork );
      }
      else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX )
      {
        lwork  = ( int ) *FLA_DOUBLE_PTR( work );
        liwork = ( int ) *FLA_INT_PTR( iwork );
      }
//printf( "ideal workspace for n = %d\n", n_A );
//printf( "                lwork = %d\n", lwork );
//printf( "               liwork = %d\n", liwork );
      FLA_Obj_free( &work );
      FLA_Obj_free( &iwork );
      FLA_Obj_create( dt_real, lwork,  1, 0, 0, &work );
      FLA_Obj_create( FLA_INT, liwork, 1, 0, 0, &iwork );
    }

    switch( datatype ) {

    case FLA_FLOAT:
    {
      float*    buff_d      = ( float*    ) FLA_FLOAT_PTR( d );
      float*    buff_e      = ( float*    ) FLA_FLOAT_PTR( e );
      float*    buff_l      = ( float*    ) FLA_FLOAT_PTR( l );
      float*    buff_A      = ( float*    ) FLA_FLOAT_PTR( A );
      int*      buff_isuppz = ( int*      ) FLA_INT_PTR( isuppz );
      float*    buff_work   = ( float*    ) FLA_FLOAT_PTR( work );
      int*      buff_iwork  = ( int*      ) FLA_INT_PTR( iwork );
 
      F77_sstemr( &blas_jobz,
                  &blas_range,
                  &n_A,
                  buff_d,
                  buff_e,
                  &vl, &vu,
                  &il, &iu,
                  &n_eig_found,
                  buff_l,
                  buff_A,     &cs_A,
                  &nzc,
                  buff_isuppz,
                  &try_rac,
                  buff_work,  &lwork,
                  buff_iwork, &liwork,
                  &info );

      break;
    }

    case FLA_DOUBLE:
    {
      double*   buff_d      = ( double*   ) FLA_DOUBLE_PTR( d );
      double*   buff_e      = ( double*   ) FLA_DOUBLE_PTR( e );
      double*   buff_l      = ( double*   ) FLA_DOUBLE_PTR( l );
      double*   buff_A      = ( double*   ) FLA_DOUBLE_PTR( A );
      int*      buff_isuppz = ( int*      ) FLA_INT_PTR( isuppz );
      double*   buff_work   = ( double*   ) FLA_DOUBLE_PTR( work );
      int*      buff_iwork  = ( int*      ) FLA_INT_PTR( iwork );

      F77_dstemr( &blas_jobz,
                  &blas_range,
                  &n_A,
                  buff_d,
                  buff_e,
                  &vl, &vu,
                  &il, &iu,
                  &n_eig_found,
                  buff_l,
                  buff_A,     &cs_A,
                  &nzc,
                  buff_isuppz,
                  &try_rac,
                  buff_work,  &lwork,
                  buff_iwork, &liwork,
                  &info );
  
      break;
    } 
  
    case FLA_COMPLEX:
    {
      float*    buff_d      = ( float*    ) FLA_FLOAT_PTR( d );
      float*    buff_e      = ( float*    ) FLA_FLOAT_PTR( e );
      float*    buff_l      = ( float*    ) FLA_FLOAT_PTR( l );
      scomplex* buff_A      = ( scomplex* ) FLA_COMPLEX_PTR( A );
      int*      buff_isuppz = ( int*      ) FLA_INT_PTR( isuppz );
      float*    buff_work   = ( float*    ) FLA_FLOAT_PTR( work );
      int*      buff_iwork  = ( int*      ) FLA_INT_PTR( iwork );

      F77_cstemr( &blas_jobz,
                  &blas_range,
                  &n_A,
                  buff_d,
                  buff_e,
                  &vl, &vu,
                  &il, &iu,
                  &n_eig_found,
                  buff_l,
                  buff_A,     &cs_A,
                  &nzc,
                  buff_isuppz,
                  &try_rac,
                  buff_work,  &lwork,
                  buff_iwork, &liwork,
                  &info );
  
      break;
    } 
  
    case FLA_DOUBLE_COMPLEX:
    {
      double*   buff_d      = ( double*   ) FLA_DOUBLE_PTR( d );
      double*   buff_e      = ( double*   ) FLA_DOUBLE_PTR( e );
      double*   buff_l      = ( double*   ) FLA_DOUBLE_PTR( l );
      dcomplex* buff_A      = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
      int*      buff_isuppz = ( int*      ) FLA_INT_PTR( isuppz );
      double*   buff_work   = ( double*   ) FLA_DOUBLE_PTR( work );
      int*      buff_iwork  = ( int*      ) FLA_INT_PTR( iwork );

      F77_zstemr( &blas_jobz,
                  &blas_range,
                  &n_A,
                  buff_d,
                  buff_e,
                  &vl, &vu,
                  &il, &iu,
                  &n_eig_found,
                  buff_l,
                  buff_A,     &cs_A,
                  &nzc,
                  buff_isuppz,
                  &try_rac,
                  buff_work,  &lwork,
                  buff_iwork, &liwork,
                  &info );
  
      break;
    } 

    }
  }

  FLA_Obj_free( &isuppz );
  FLA_Obj_free( &work );
  FLA_Obj_free( &iwork );
#else
  FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED );
#endif

  return info;
}
Пример #6
0
FLA_Error FLA_Trsmsx_external( FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C )
{
  FLA_Datatype datatype;
  int          m_B, n_B;
  int          rs_A, cs_A;
  int          rs_B, cs_B;
  int          rs_C, cs_C;
  side_t       blis_side; 
  uplo_t       blis_uplo;
  trans_t      blis_trans;
  diag_t       blis_diag;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Trsmsx_check( side, uplo, trans, diag, alpha, A, B, beta, C );

  if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  m_B      = FLA_Obj_length( B );
  n_B      = FLA_Obj_width( B );
  rs_B     = FLA_Obj_row_stride( B );
  cs_B     = FLA_Obj_col_stride( B );

  rs_C     = FLA_Obj_row_stride( C );
  cs_C     = FLA_Obj_col_stride( C );

  FLA_Param_map_flame_to_blis_side( side, &blis_side );
  FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
  FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );
  FLA_Param_map_flame_to_blis_diag( diag, &blis_diag );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_B     = ( float * ) FLA_FLOAT_PTR( B );
    float *buff_C     = ( float * ) FLA_FLOAT_PTR( C );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );

    bli_strsmsx( blis_side,
                 blis_uplo, 
                 blis_trans,
                 blis_diag,
                 m_B,
                 n_B,
                 buff_alpha,
                 buff_A, rs_A, cs_A, 
                 buff_B, rs_B, cs_B, 
                 buff_beta,
                 buff_C, rs_C, cs_C );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_B     = ( double * ) FLA_DOUBLE_PTR( B );
    double *buff_C     = ( double * ) FLA_DOUBLE_PTR( C );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );

    bli_dtrsmsx( blis_side,
                 blis_uplo, 
                 blis_trans,
                 blis_diag,
                 m_B,
                 n_B,
                 buff_alpha,
                 buff_A, rs_A, cs_A, 
                 buff_B, rs_B, cs_B, 
                 buff_beta,
                 buff_C, rs_C, cs_C );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_B     = ( scomplex * ) FLA_COMPLEX_PTR( B );
    scomplex *buff_C     = ( scomplex * ) FLA_COMPLEX_PTR( C );
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
    scomplex *buff_beta  = ( scomplex * ) FLA_COMPLEX_PTR( beta );

    bli_ctrsmsx( blis_side,
                 blis_uplo, 
                 blis_trans,
                 blis_diag,
                 m_B,
                 n_B,
                 buff_alpha,
                 buff_A, rs_A, cs_A, 
                 buff_B, rs_B, cs_B, 
                 buff_beta,
                 buff_C, rs_C, cs_C );

    break;
  }


  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_B     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
    dcomplex *buff_C     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    dcomplex *buff_beta  = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );

    bli_ztrsmsx( blis_side,
                 blis_uplo, 
                 blis_trans,
                 blis_diag,
                 m_B,
                 n_B,
                 buff_alpha,
                 buff_A, rs_A, cs_A, 
                 buff_B, rs_B, cs_B, 
                 buff_beta,
                 buff_C, rs_C, cs_C );

    break;
  }

  }

  return FLA_SUCCESS;
}
Пример #7
0
FLA_Error FLA_Tridiag_unb_external( FLA_Uplo uplo, FLA_Obj A, FLA_Obj t )
{
  int          info = 0;
#ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES
  FLA_Datatype datatype;
  int          n_A, cs_A;
  FLA_Obj      d, e;
  char         blas_uplo;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
    FLA_Tridiag_check( uplo, A, t );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  n_A      = FLA_Obj_width( A );
  cs_A     = FLA_Obj_col_stride( A );

  FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), n_A,     1, 0, 0, &d );
  FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), n_A - 1, 1, 0, 0, &e );

  FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float* buff_A    = ( float * ) FLA_FLOAT_PTR( A );
    float* buff_d    = ( float * ) FLA_FLOAT_PTR( d );
    float* buff_e    = ( float * ) FLA_FLOAT_PTR( e );
    float* buff_t    = ( float * ) FLA_FLOAT_PTR( t );

    F77_ssytd2( &blas_uplo,
                &n_A,
                buff_A, &cs_A,
                buff_d,
                buff_e,
                buff_t,
                &info );

    break;
  }

  case FLA_DOUBLE:
  {
    double* buff_A    = ( double * ) FLA_DOUBLE_PTR( A );
    double* buff_d    = ( double * ) FLA_DOUBLE_PTR( d );
    double* buff_e    = ( double * ) FLA_DOUBLE_PTR( e );
    double* buff_t    = ( double * ) FLA_DOUBLE_PTR( t );

    F77_dsytd2( &blas_uplo,
                &n_A,
                buff_A, &cs_A,
                buff_d,
                buff_e,
                buff_t,
                &info );

    break;
  } 

  case FLA_COMPLEX:
  {
    scomplex* buff_A    = ( scomplex * ) FLA_COMPLEX_PTR( A );
    float*    buff_d    = ( float    * ) FLA_FLOAT_PTR( d );
    float*    buff_e    = ( float    * ) FLA_FLOAT_PTR( e );
    scomplex* buff_t    = ( scomplex * ) FLA_COMPLEX_PTR( t );

    F77_chetd2( &blas_uplo,
                &n_A,
                buff_A, &cs_A,
                buff_d,
                buff_e,
                buff_t,
                &info );

    break;
  } 

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex* buff_A    = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    double*   buff_d    = ( double   * ) FLA_DOUBLE_PTR( d );
    double*   buff_e    = ( double   * ) FLA_DOUBLE_PTR( e );
    dcomplex* buff_t    = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t );

    F77_zhetd2( &blas_uplo,
                &n_A,
                buff_A, &cs_A,
                buff_d,
                buff_e,
                buff_t,
                &info );

    break;
  } 

  }

  FLA_Obj_free( &d );
  FLA_Obj_free( &e );
#else
  FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED );
#endif

  return info;
}
Пример #8
0
FLA_Error FLA_Hemm_external( FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C )
{
  FLA_Datatype datatype;
  int          m_C, n_C;
  int          rs_A, cs_A;
  int          rs_B, cs_B;
  int          rs_C, cs_C;
  side1_t       blis_side;
  uplo1_t       blis_uplo; 

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Hemm_check( side, uplo, alpha, A, B, beta, C );

  if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_B     = FLA_Obj_row_stride( B );
  cs_B     = FLA_Obj_col_stride( B );

  m_C      = FLA_Obj_length( C );
  n_C      = FLA_Obj_width( C );
  rs_C     = FLA_Obj_row_stride( C );
  cs_C     = FLA_Obj_col_stride( C );

  FLA_Param_map_flame_to_blis_side( side, &blis_side );
  FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
  

  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_B     = ( float * ) FLA_FLOAT_PTR( B );
    float *buff_C     = ( float * ) FLA_FLOAT_PTR( C );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );

    bl1_ssymm( blis_side,
               blis_uplo,
               m_C,
               n_C,
               buff_alpha,
               buff_A, rs_A, cs_A,
               buff_B, rs_B, cs_B,
               buff_beta,
               buff_C, rs_C, cs_C );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_B     = ( double * ) FLA_DOUBLE_PTR( B );
    double *buff_C     = ( double * ) FLA_DOUBLE_PTR( C );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );

    bl1_dsymm( blis_side,
               blis_uplo,
               m_C,
               n_C,
               buff_alpha,
               buff_A, rs_A, cs_A,
               buff_B, rs_B, cs_B,
               buff_beta,
               buff_C, rs_C, cs_C );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_B     = ( scomplex * ) FLA_COMPLEX_PTR( B );
    scomplex *buff_C     = ( scomplex * ) FLA_COMPLEX_PTR( C );
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
    scomplex *buff_beta  = ( scomplex * ) FLA_COMPLEX_PTR( beta );

    bl1_chemm( blis_side,
               blis_uplo,
               m_C,
               n_C,
               buff_alpha,
               buff_A, rs_A, cs_A,
               buff_B, rs_B, cs_B,
               buff_beta,
               buff_C, rs_C, cs_C );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_B     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
    dcomplex *buff_C     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    dcomplex *buff_beta  = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );

    bl1_zhemm( blis_side,
               blis_uplo,
               m_C,
               n_C,
               buff_alpha,
               buff_A, rs_A, cs_A,
               buff_B, rs_B, cs_B,
               buff_beta,
               buff_C, rs_C, cs_C );

    break;
  }

  }
 
  return FLA_SUCCESS;
}
Пример #9
0
FLA_Error FLA_Syrk_external_gpu( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu )
{
  FLA_Datatype datatype;
  int          k_A;
  int          m_A, n_A;
  int          m_C;
  int          ldim_A;
  int          ldim_C;
  char         blas_uplo; 
  char         blas_trans;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Syrk_check( uplo, trans, alpha, A, beta, C );

  if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  ldim_A   = FLA_Obj_length( A );

  m_C      = FLA_Obj_length( C );
  ldim_C   = FLA_Obj_length( C );

  if ( trans == FLA_NO_TRANSPOSE )
    k_A = n_A;
  else
    k_A = m_A;

  FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
  FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );

    cublasSsyrk( blas_uplo,
                 blas_trans,
                 m_C,
                 k_A,
                 *buff_alpha,
                 ( float * ) A_gpu, ldim_A,
                 *buff_beta,
                 ( float * ) C_gpu, ldim_C );
    
    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );

    cublasDsyrk( blas_uplo,
                 blas_trans,
                 m_C,
                 k_A,
                 *buff_alpha,
                 ( double * ) A_gpu, ldim_A,
                 *buff_beta,
                 ( double * ) C_gpu, ldim_C );

    break;
  }

  case FLA_COMPLEX:
  {
    cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
    cuComplex *buff_beta  = ( cuComplex * ) FLA_COMPLEX_PTR( beta );

    cublasCsyrk( blas_uplo,
                 blas_trans,
                 m_C,
                 k_A,
                 *buff_alpha,
                 ( cuComplex * ) A_gpu, ldim_A,
                 *buff_beta,
                 ( cuComplex * ) C_gpu, ldim_C );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    cuDoubleComplex *buff_beta  = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );

    cublasZsyrk( blas_uplo,
                 blas_trans,
                 m_C,
                 k_A,
                 *buff_alpha,
                 ( cuDoubleComplex * ) A_gpu, ldim_A,
                 *buff_beta,
                 ( cuDoubleComplex * ) C_gpu, ldim_C );

    break;
  }

  }
  
  return FLA_SUCCESS;
}
Пример #10
0
FLA_Error FLA_Setr( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A )
{
    FLA_Datatype datatype;
    int          m_A, n_A;
    int          rs_A, cs_A;
    uplo1_t       blis_uplo;

    if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
        FLA_Setr_check( uplo, alpha, A );

    if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

    datatype = FLA_Obj_datatype( A );

    m_A      = FLA_Obj_length( A );
    n_A      = FLA_Obj_width( A );

    rs_A     = FLA_Obj_row_stride( A );
    cs_A     = FLA_Obj_col_stride( A );

    FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );

    switch ( datatype ) {

    case FLA_FLOAT:
    {
        float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
        float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );

        bl1_ssetmr( blis_uplo,
                    m_A,
                    n_A,
                    buff_alpha,
                    buff_A, rs_A, cs_A );

        break;
    }

    case FLA_DOUBLE:
    {
        double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
        double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );

        bl1_dsetmr( blis_uplo,
                    m_A,
                    n_A,
                    buff_alpha,
                    buff_A, rs_A, cs_A );

        break;
    }

    case FLA_COMPLEX:
    {
        scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
        scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );

        bl1_csetmr( blis_uplo,
                    m_A,
                    n_A,
                    buff_alpha,
                    buff_A, rs_A, cs_A );

        break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
        dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
        dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );

        bl1_zsetmr( blis_uplo,
                    m_A,
                    n_A,
                    buff_alpha,
                    buff_A, rs_A, cs_A );

        break;
    }

    }

    return FLA_SUCCESS;
}
Пример #11
0
FLA_Error FLA_SA_LU_unb( FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L )
{
  FLA_Datatype datatype;
  int          m_U, cs_U;
  int          m_D, cs_D;
  int               cs_L;
  // int               rs_U;
  int               rs_D;
  // int               rs_L;
  int          m_U_min_j, m_U_min_j_min_1; 
  int          j, ipiv;
  int*         buff_p;

  if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS;
  
  datatype = FLA_Obj_datatype( U );

  m_U      = FLA_Obj_length( U );
  // rs_U     = FLA_Obj_row_stride( U );
  cs_U     = FLA_Obj_col_stride( U );

  m_D      = FLA_Obj_length( D );
  rs_D     = FLA_Obj_row_stride( D );
  cs_D     = FLA_Obj_col_stride( D );
  
  // rs_L     = FLA_Obj_row_stride( L );
  cs_L     = FLA_Obj_col_stride( L );

  FLA_Copy_external( U, L );
  FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L );

  buff_p     = ( int * ) FLA_INT_PTR( p );

  switch ( datatype ){

  case FLA_FLOAT:
  {
    float* buff_U      = ( float * ) FLA_FLOAT_PTR( U );
    float* buff_D      = ( float * ) FLA_FLOAT_PTR( D );
    float* buff_L      = ( float * ) FLA_FLOAT_PTR( L );
    float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE );
    float  L_tmp;
    float  D_tmp;
    float  d_inv_Ljj;

    for ( j = 0; j < m_U; ++j )
    {
      bl1_samax( m_D, 
                 buff_D + j*cs_D + 0*rs_D,
                 rs_D,
                 &ipiv );

      L_tmp = buff_L[ j*cs_L + j    ];
      D_tmp = buff_D[ j*cs_D + ipiv ];

      if ( fabsf( L_tmp ) < fabsf( D_tmp ) )
      {
        bl1_sswap( m_U,
                   buff_L + 0*cs_L + j,    cs_L,
                   buff_D + 0*cs_D + ipiv, cs_D ); 

        buff_p[ j ] = ipiv + m_U - j;
      }        
      else
      {
        buff_p[ j ] = 0;
      }

      d_inv_Ljj = 1.0F / buff_L[ j*cs_L + j ];

      bl1_sscal( m_D,
                 &d_inv_Ljj,
                 buff_D + j*cs_D + 0, rs_D ); 

      m_U_min_j_min_1 = m_U - j - 1;

      if ( m_U_min_j_min_1 > 0  )
      {
        bl1_sger( BLIS1_NO_CONJUGATE,
                  BLIS1_NO_CONJUGATE,
                  m_D,
                  m_U_min_j_min_1,
                  buff_minus1, 
                  buff_D + (j+0)*cs_D + 0, rs_D,
                  buff_L + (j+1)*cs_L + j, cs_L,
                  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
      }

      m_U_min_j = m_U - j;

      if ( m_U_min_j > 0 ) 
      {
        bl1_scopy( m_U_min_j,
                   buff_L + j*cs_L + j, cs_L,
                   buff_U + j*cs_U + j, cs_U );
      }
    }                 
    break;
  }

  case FLA_DOUBLE:
  {
    double* buff_U      = ( double * ) FLA_DOUBLE_PTR( U );
    double* buff_D      = ( double * ) FLA_DOUBLE_PTR( D );
    double* buff_L      = ( double * ) FLA_DOUBLE_PTR( L );
    double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE );
    double  L_tmp;
    double  D_tmp;
    double  d_inv_Ljj;

    for ( j = 0; j < m_U; ++j )
    {
      bl1_damax( m_D, 
                 buff_D + j*cs_D + 0*rs_D,
                 rs_D,
                 &ipiv );

      L_tmp = buff_L[ j*cs_L + j    ];
      D_tmp = buff_D[ j*cs_D + ipiv ];

      if ( fabs( L_tmp ) < fabs( D_tmp ) )
      {
        bl1_dswap( m_U,
                   buff_L + 0*cs_L + j,    cs_L,
                   buff_D + 0*cs_D + ipiv, cs_D ); 

        buff_p[ j ] = ipiv + m_U - j;
      }        
      else
      {
        buff_p[ j ] = 0;
      }

      d_inv_Ljj = 1.0 / buff_L[ j*cs_L + j ];

      bl1_dscal( m_D,
                 &d_inv_Ljj,
                 buff_D + j*cs_D + 0, rs_D ); 

      m_U_min_j_min_1 = m_U - j - 1;

      if ( m_U_min_j_min_1 > 0  )
      {
        bl1_dger( BLIS1_NO_CONJUGATE,
                  BLIS1_NO_CONJUGATE,
                  m_D,
                  m_U_min_j_min_1,
                  buff_minus1, 
                  buff_D + (j+0)*cs_D + 0, rs_D,
                  buff_L + (j+1)*cs_L + j, cs_L,
                  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
      }

      m_U_min_j = m_U - j;

      if ( m_U_min_j > 0 ) 
      {
        bl1_dcopy( m_U_min_j,
                   buff_L + j*cs_L + j, cs_L,
                   buff_U + j*cs_U + j, cs_U );
      }
    }                 
    break;
  }

  case FLA_COMPLEX:
  {
    scomplex* buff_U      = ( scomplex * ) FLA_COMPLEX_PTR( U );
    scomplex* buff_D      = ( scomplex * ) FLA_COMPLEX_PTR( D );
    scomplex* buff_L      = ( scomplex * ) FLA_COMPLEX_PTR( L );
    scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE );
    scomplex  L_tmp;
    scomplex  D_tmp;
    scomplex  d_inv_Ljj;
    scomplex  Ljj;
    float     temp;

    for ( j = 0; j < m_U; ++j )
    {
      bl1_camax( m_D, 
                 buff_D + j*cs_D + 0*rs_D,
                 rs_D,
                 &ipiv );

      L_tmp = buff_L[ j*cs_L + j    ];
      D_tmp = buff_D[ j*cs_D + ipiv ];

      if ( fabsf( L_tmp.real + L_tmp.imag ) < fabsf( D_tmp.real + D_tmp.imag ) )
      {
        bl1_cswap( m_U,
                   buff_L + 0*cs_L + j,    cs_L,
                   buff_D + 0*cs_D + ipiv, cs_D ); 

        buff_p[ j ] = ipiv + m_U - j;
      }        
      else
      {
        buff_p[ j ] = 0;
      }

      Ljj = buff_L[ j*cs_L + j ];

      // d_inv_Ljj = 1.0 / Ljj
      temp = 1.0F / ( Ljj.real * Ljj.real +
                      Ljj.imag * Ljj.imag );
      d_inv_Ljj.real = Ljj.real *  temp;
      d_inv_Ljj.imag = Ljj.imag * -temp;

      bl1_cscal( m_D,
                 &d_inv_Ljj,
                 buff_D + j*cs_D + 0, rs_D ); 

      m_U_min_j_min_1 = m_U - j - 1;

      if ( m_U_min_j_min_1 > 0  )
      {
        bl1_cger( BLIS1_NO_CONJUGATE,
                  BLIS1_NO_CONJUGATE,
                  m_D,
                  m_U_min_j_min_1,
                  buff_minus1, 
                  buff_D + (j+0)*cs_D + 0, rs_D,
                  buff_L + (j+1)*cs_L + j, cs_L,
                  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
      }

      m_U_min_j = m_U - j;

      if ( m_U_min_j > 0 ) 
      {
        bl1_ccopy( m_U_min_j,
                   buff_L + j*cs_L + j, cs_L,
                   buff_U + j*cs_U + j, cs_U );
      }
    }                 
    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex* buff_U      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U );
    dcomplex* buff_D      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D );
    dcomplex* buff_L      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( L );
    dcomplex* buff_minus1 = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
    dcomplex  L_tmp;
    dcomplex  D_tmp;
    dcomplex  d_inv_Ljj;
    dcomplex  Ljj;
    double    temp;

    for ( j = 0; j < m_U; ++j )
    {
      bl1_zamax( m_D, 
                 buff_D + j*cs_D + 0*rs_D,
                 rs_D,
                 &ipiv );

      L_tmp = buff_L[ j*cs_L + j    ];
      D_tmp = buff_D[ j*cs_D + ipiv ];

      if ( fabs( L_tmp.real + L_tmp.imag ) < fabs( D_tmp.real + D_tmp.imag ) )
      {
        bl1_zswap( m_U,
                   buff_L + 0*cs_L + j,    cs_L,
                   buff_D + 0*cs_D + ipiv, cs_D ); 

        buff_p[ j ] = ipiv + m_U - j;
      }        
      else
      {
        buff_p[ j ] = 0;
      }

      Ljj = buff_L[ j*cs_L + j ];

      // d_inv_Ljj = 1.0 / Ljj
      temp = 1.0  / ( Ljj.real * Ljj.real +
                      Ljj.imag * Ljj.imag );
      d_inv_Ljj.real = Ljj.real *  temp;
      d_inv_Ljj.imag = Ljj.imag * -temp;

      bl1_zscal( m_D,
                 &d_inv_Ljj,
                 buff_D + j*cs_D + 0, rs_D ); 

      m_U_min_j_min_1 = m_U - j - 1;

      if ( m_U_min_j_min_1 > 0  )
      {
        bl1_zger( BLIS1_NO_CONJUGATE,
                  BLIS1_NO_CONJUGATE,
                  m_D,
                  m_U_min_j_min_1,
                  buff_minus1, 
                  buff_D + (j+0)*cs_D + 0, rs_D,
                  buff_L + (j+1)*cs_L + j, cs_L,
                  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
      }

      m_U_min_j = m_U - j;

      if ( m_U_min_j > 0 ) 
      {
        bl1_zcopy( m_U_min_j,
                   buff_L + j*cs_L + j, cs_L,
                   buff_U + j*cs_U + j, cs_U );
      }
    }                 
    break;
  }

  }

  return FLA_SUCCESS;
}
Пример #12
0
FLA_Error FLA_Hemvc_external( FLA_Uplo uplo, FLA_Conj conja, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y )
{
  FLA_Datatype datatype;
  int          m_A;
  int          rs_A, cs_A;
  int          inc_x;
  int          inc_y;
  uplo_t       blis_uplo;
  conj_t       blis_conja;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Hemvc_check( uplo, conja, alpha, A, x, beta, y );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_x    = FLA_Obj_vector_inc( x );
  inc_y    = FLA_Obj_vector_inc( y );

  FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
  FLA_Param_map_flame_to_blis_conj( conja, &blis_conja );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_x     = ( float * ) FLA_FLOAT_PTR( x );
    float *buff_y     = ( float * ) FLA_FLOAT_PTR( y );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );

    bli_ssymv( blis_uplo,
               m_A,
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,
               buff_y, inc_y );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_x     = ( double * ) FLA_DOUBLE_PTR( x );
    double *buff_y     = ( double * ) FLA_DOUBLE_PTR( y );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );

    bli_dsymv( blis_uplo,
               m_A,
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,
               buff_y, inc_y );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_x     = ( scomplex * ) FLA_COMPLEX_PTR( x );
    scomplex *buff_y     = ( scomplex * ) FLA_COMPLEX_PTR( y );
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
    scomplex *buff_beta  = ( scomplex * ) FLA_COMPLEX_PTR( beta );

    bli_chemv( blis_uplo,
               blis_conja,
               m_A,
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,
               buff_y, inc_y );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_x     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );
    dcomplex *buff_y     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y );
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    dcomplex *buff_beta  = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );

    bli_zhemv( blis_uplo,
               blis_conja,
               m_A,
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,
               buff_y, inc_y );

    break;
  }

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Tridiag_apply_Q_external( FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Obj A, FLA_Obj t, FLA_Obj B )
{
  int          info = 0;
#ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES
  FLA_Datatype datatype;
  // int          m_A, n_A;
  int          m_B, n_B;
  int          cs_A;
  int          cs_B;
  int          k_t;
  int          lwork;
  char         blas_side;
  char         blas_uplo;
  char         blas_trans;
  FLA_Obj      work;
  int          i;

  //if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
  //  FLA_Apply_Q_check( side, trans, storev, A, t, B );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  // m_A      = FLA_Obj_length( A );
  // n_A      = FLA_Obj_width( A );
  cs_A     = FLA_Obj_col_stride( A );

  m_B      = FLA_Obj_length( B );
  n_B      = FLA_Obj_width( B );
  cs_B     = FLA_Obj_col_stride( B );

  k_t      = FLA_Obj_vector_dim( t );

  FLA_Param_map_flame_to_netlib_side( side, &blas_side );
  FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
  FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans );


  // Make a workspace query the first time through. This will provide us with
  // and ideal workspace size based on an internal block size.
  lwork = -1;
  FLA_Obj_create( datatype, 1, 1, 0, 0, &work );

  for ( i = 0; i < 2; ++i )
  {
    if ( i == 1 )
    {
      // Grab the queried ideal workspace size from the work array, free the
      // work object, and then re-allocate the workspace with the ideal size.
      if      ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX )
        lwork = ( int ) *FLA_FLOAT_PTR( work );
      else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX )
        lwork = ( int ) *FLA_DOUBLE_PTR( work );

      FLA_Obj_free( &work );
      FLA_Obj_create( datatype, lwork, 1, 0, 0, &work );
    }

    switch( datatype ){
  
    case FLA_FLOAT:
    {
      float *buff_A    = ( float * ) FLA_FLOAT_PTR( A );
      float *buff_t    = ( float * ) FLA_FLOAT_PTR( t );
      float *buff_B    = ( float * ) FLA_FLOAT_PTR( B );
      float *buff_work = ( float * ) FLA_FLOAT_PTR( work );
  
      F77_sormtr( &blas_side,
                  &blas_uplo,
                  &blas_trans,
                  &m_B,
                  &n_B,
                  buff_A, &cs_A,
                  buff_t,
                  buff_B, &cs_B,
                  buff_work, &lwork,
                  &info );
  
      break;
    }
  
    case FLA_DOUBLE:
    {
      double *buff_A    = ( double * ) FLA_DOUBLE_PTR( A );
      double *buff_t    = ( double * ) FLA_DOUBLE_PTR( t );
      double *buff_B    = ( double * ) FLA_DOUBLE_PTR( B );
      double *buff_work = ( double * ) FLA_DOUBLE_PTR( work );
  
      F77_dormtr( &blas_side,
                  &blas_uplo,
                  &blas_trans,
                  &m_B,
                  &n_B,
                  buff_A, &cs_A,
                  buff_t,
                  buff_B, &cs_B,
                  buff_work, &lwork,
                  &info );
  
      break;
    }
  
    case FLA_COMPLEX:
    {
      scomplex *buff_A    = ( scomplex * ) FLA_COMPLEX_PTR( A );
      scomplex *buff_t    = ( scomplex * ) FLA_COMPLEX_PTR( t );
      scomplex *buff_B    = ( scomplex * ) FLA_COMPLEX_PTR( B );
      scomplex *buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work );
  
      F77_cunmtr( &blas_side,
                  &blas_uplo,
                  &blas_trans,
                  &m_B,
                  &n_B,
                  buff_A, &cs_A,
                  buff_t,
                  buff_B, &cs_B,
                  buff_work, &lwork,
                  &info );
  
      break;
    }
  
    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex *buff_A    = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex *buff_t    = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( t );
      dcomplex *buff_B    = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
      dcomplex *buff_work = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( work );
  
      F77_zunmtr( &blas_side,
                  &blas_uplo,
                  &blas_trans,
                  &m_B,
                  &n_B,
                  buff_A, &cs_A,
                  buff_t,
                  buff_B, &cs_B,
                  buff_work, &lwork,
                  &info );
  
      break;
    }
  
    }
  }

  FLA_Obj_free( &work );
#else
  FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED );
#endif

  return info;
}
Пример #14
0
FLA_Error FLA_Axpy_external( FLA_Obj alpha, FLA_Obj A, FLA_Obj B )
{
  FLA_Datatype datatype;
  int          m_B, n_B;
  int          rs_A, cs_A;
  int          rs_B, cs_B;
  trans1_t      blis_trans;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Axpy_check( alpha, A, B );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  m_B      = FLA_Obj_length( B );
  n_B      = FLA_Obj_width( B );
  rs_B     = FLA_Obj_row_stride( B );
  cs_B     = FLA_Obj_col_stride( B );

  if ( FLA_Obj_is_conformal_to( FLA_NO_TRANSPOSE, A, B ) )
    FLA_Param_map_flame_to_blis_trans( FLA_NO_TRANSPOSE, &blis_trans );
  else // if ( FLA_Obj_is_conformal_to( FLA_TRANSPOSE, A, B ) )
    FLA_Param_map_flame_to_blis_trans( FLA_TRANSPOSE, &blis_trans );

  switch ( datatype ){

  case FLA_FLOAT:
  {
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_B     = ( float * ) FLA_FLOAT_PTR( B );

    bl1_saxpymt( blis_trans,
                 m_B,
                 n_B,
                 buff_alpha,
                 buff_A, rs_A, cs_A,
                 buff_B, rs_B, cs_B );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_B     = ( double * ) FLA_DOUBLE_PTR( B );

    bl1_daxpymt( blis_trans,
                 m_B,
                 n_B,
                 buff_alpha,
                 buff_A, rs_A, cs_A,
                 buff_B, rs_B, cs_B );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
    scomplex *buff_A =     ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_B =     ( scomplex * ) FLA_COMPLEX_PTR( B );

    bl1_caxpymt( blis_trans,
                 m_B,
                 n_B,
                 buff_alpha,
                 buff_A, rs_A, cs_A,
                 buff_B, rs_B, cs_B );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_B     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );

    bl1_zaxpymt( blis_trans,
                 m_B,
                 n_B,
                 buff_alpha,
                 buff_A, rs_A, cs_A,
                 buff_B, rs_B, cs_B );

    break;
  }

  }
  
  return FLA_SUCCESS;
}
Пример #15
0
FLA_Error FLA_Hemm_external_gpu( FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu, FLA_Obj beta, FLA_Obj C, void* C_gpu )
{
  FLA_Datatype datatype;
  int          m_C, n_C;
  int          ldim_A;
  int          ldim_B;
  int          ldim_C;
  char         blas_side;
  char         blas_uplo; 

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Hemm_check( side, uplo, alpha, A, B, beta, C );

  if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  ldim_A   = FLA_Obj_length( A );

  ldim_B   = FLA_Obj_length( B );

  m_C      = FLA_Obj_length( C );
  n_C      = FLA_Obj_width( C );
  ldim_C   = FLA_Obj_length( C );

  FLA_Param_map_flame_to_netlib_side( side, &blas_side );
  FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
  

  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );

    cublasSsymm( blas_side,
                 blas_uplo,
                 m_C,
                 n_C,
                 *buff_alpha,
                 ( float * ) A_gpu, ldim_A,
                 ( float * ) B_gpu, ldim_B,
                 *buff_beta,
                 ( float * ) C_gpu, ldim_C );
    
    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );

    cublasDsymm( blas_side,
                 blas_uplo,
                 m_C,
                 n_C,
                 *buff_alpha,
                 ( double * ) A_gpu, ldim_A,
                 ( double * ) B_gpu, ldim_B,
                 *buff_beta,
                 ( double * ) C_gpu, ldim_C );

    break;
  }

  case FLA_COMPLEX:
  {
    cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );
    cuComplex *buff_beta  = ( cuComplex * ) FLA_COMPLEX_PTR( beta );

    cublasChemm( blas_side,
                 blas_uplo,
                 m_C,
                 n_C,
                 *buff_alpha,
                 ( cuComplex * ) A_gpu, ldim_A,
                 ( cuComplex * ) B_gpu, ldim_B,
                 *buff_beta,
                 ( cuComplex * ) C_gpu, ldim_C );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    cuDoubleComplex *buff_beta  = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );

    cublasZhemm( blas_side,
                 blas_uplo,
                 m_C,
                 n_C,
                 *buff_alpha,
                 ( cuDoubleComplex * ) A_gpu, ldim_A,
                 ( cuDoubleComplex * ) B_gpu, ldim_B,
                 *buff_beta,
                 ( cuDoubleComplex * ) C_gpu, ldim_C );

    break;
  }

  }
 
  return FLA_SUCCESS;
}
Пример #16
0
FLA_Error FLA_Svdd_external( FLA_Svd_type jobz, FLA_Obj A, FLA_Obj s, FLA_Obj U, FLA_Obj V )
{
  int          info = 0;
#ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES
  FLA_Datatype datatype;
  FLA_Datatype dt_real;
  FLA_Datatype dt_int;
  int          m_A, n_A, cs_A;
  int          cs_U;
  int          cs_V;
  int          min_m_n;
  int          lwork, lrwork, liwork;
  FLA_Obj      work, rwork, iwork;
  char         blas_jobz;
  int          i;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
    FLA_Svdd_check( jobz, A, s, U, V );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );
  dt_real  = FLA_Obj_datatype_proj_to_real( A );
  dt_int   = FLA_INT;

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  cs_A     = FLA_Obj_col_stride( A );

  cs_U     = FLA_Obj_col_stride( U );

  cs_V     = FLA_Obj_col_stride( V );

  min_m_n  = min( m_A, n_A );

  // Allocate the rwork and iwork arrays up front.
  if ( jobz == FLA_SVD_VECTORS_NONE ) lrwork   = 5 * min_m_n;
  else                                lrwork   = 5 * min_m_n * min_m_n +
                                                 7 * min_m_n;
  liwork = 8 * min_m_n;

  FLA_Obj_create( dt_int,  liwork, 1, 0, 0, &iwork );
  if ( FLA_Obj_is_complex( A ) )
    FLA_Obj_create( dt_real, lrwork, 1, 0, 0, &rwork );

  FLA_Param_map_flame_to_netlib_svd_type( jobz, &blas_jobz );

  // Make a workspace query the first time through. This will provide us with
  // and ideal workspace size based on an internal block size.
  lwork = -1;
  FLA_Obj_create( datatype, 1, 1, 0, 0, &work );

  for ( i = 0; i < 2; ++i )
  {
    if ( i == 1 )
    {
      // Grab the queried ideal workspace size from the work array, free the
      // work object, and then re-allocate the workspace with the ideal size.
      if      ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX )
        lwork = ( int ) *FLA_FLOAT_PTR( work );
      else if ( datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX )
        lwork = ( int ) *FLA_DOUBLE_PTR( work );

      FLA_Obj_free( &work );
      FLA_Obj_create( datatype, lwork, 1, 0, 0, &work );
    }

    switch( datatype ) {

    case FLA_FLOAT:
    {
      float*    buff_A     = ( float*    ) FLA_FLOAT_PTR( A );
      float*    buff_s     = ( float*    ) FLA_FLOAT_PTR( s );
      float*    buff_U     = ( float*    ) FLA_FLOAT_PTR( U );
      float*    buff_V     = ( float*    ) FLA_FLOAT_PTR( V );
      float*    buff_work  = ( float*    ) FLA_FLOAT_PTR( work );
      int*      buff_iwork = ( int*      ) FLA_INT_PTR( iwork );
  
      F77_sgesdd( &blas_jobz,
                  &m_A,
                  &n_A,
                  buff_A,    &cs_A,
                  buff_s,
                  buff_U,    &cs_U,
                  buff_V,    &cs_V,
                  buff_work, &lwork,
                  buff_iwork,
                  &info );

      break;
    }

    case FLA_DOUBLE:
    {
      double*   buff_A     = ( double*   ) FLA_DOUBLE_PTR( A );
      double*   buff_s     = ( double*   ) FLA_DOUBLE_PTR( s );
      double*   buff_U     = ( double*   ) FLA_DOUBLE_PTR( U );
      double*   buff_V     = ( double*   ) FLA_DOUBLE_PTR( V );
      double*   buff_work  = ( double*   ) FLA_DOUBLE_PTR( work );
      int*      buff_iwork = ( int*      ) FLA_INT_PTR( iwork );
  
      F77_dgesdd( &blas_jobz,
                  &m_A,
                  &n_A,
                  buff_A,    &cs_A,
                  buff_s,
                  buff_U,    &cs_U,
                  buff_V,    &cs_V,
                  buff_work, &lwork,
                  buff_iwork,
                  &info );
  
      break;
    } 
  
    case FLA_COMPLEX:
    {
      scomplex* buff_A     = ( scomplex* ) FLA_COMPLEX_PTR( A );
      float*    buff_s     = ( float*    ) FLA_FLOAT_PTR( s );
      scomplex* buff_U     = ( scomplex* ) FLA_COMPLEX_PTR( U );
      scomplex* buff_V     = ( scomplex* ) FLA_COMPLEX_PTR( V );
      scomplex* buff_work  = ( scomplex* ) FLA_COMPLEX_PTR( work );
      float*    buff_rwork = ( float*    ) FLA_FLOAT_PTR( rwork );
      int*      buff_iwork = ( int*      ) FLA_INT_PTR( iwork );
  
      F77_cgesdd( &blas_jobz,
                  &m_A,
                  &n_A,
                  buff_A,    &cs_A,
                  buff_s,
                  buff_U,    &cs_U,
                  buff_V,    &cs_V,
                  buff_work, &lwork,
                  buff_rwork,
                  buff_iwork,
                  &info );
  
      break;
    } 
  
    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A     = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
      double*   buff_s     = ( double*   ) FLA_DOUBLE_PTR( s );
      dcomplex* buff_U     = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( U );
      dcomplex* buff_V     = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( V );
      dcomplex* buff_work  = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( work );
      double*   buff_rwork = ( double*   ) FLA_DOUBLE_PTR( rwork );
      int*      buff_iwork = ( int*      ) FLA_INT_PTR( iwork );
  
      F77_zgesdd( &blas_jobz,
                  &m_A,
                  &n_A,
                  buff_A,    &cs_A,
                  buff_s,
                  buff_U,    &cs_U,
                  buff_V,    &cs_V,
                  buff_work, &lwork,
                  buff_rwork,
                  buff_iwork,
                  &info );
  
      break;
    } 

    }
  }

  FLA_Obj_free( &work );
  FLA_Obj_free( &iwork );
  if ( FLA_Obj_is_complex( A ) )
    FLA_Obj_free( &rwork );
#else
  FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED );
#endif

  return info;
}
Пример #17
0
FLA_Error FLA_Gemv_external( FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y )
{
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_x;
  int          inc_y;
  trans_t      blis_transa;
  conj_t       blis_conjx;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Gemv_check( transa, alpha, A, x, beta, y );

  if ( FLA_Obj_has_zero_dim( A ) )
  {
    FLA_Scal_external( beta, y );
    return FLA_SUCCESS;
  }

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_x    = FLA_Obj_vector_inc( x );
  inc_y    = FLA_Obj_vector_inc( y );

  FLA_Param_map_flame_to_blis_trans( transa, &blis_transa );
  FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx );


  switch( datatype ){
  
  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_x     = ( float * ) FLA_FLOAT_PTR( x );
    float *buff_y     = ( float * ) FLA_FLOAT_PTR( y );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );

    bli_sgemv( blis_transa,
               blis_conjx,
               m_A,
               n_A, 
               buff_alpha,  
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,  
               buff_y, inc_y );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_x     = ( double * ) FLA_DOUBLE_PTR( x );
    double *buff_y     = ( double * ) FLA_DOUBLE_PTR( y );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );

    bli_dgemv( blis_transa,
               blis_conjx,
               m_A,
               n_A, 
               buff_alpha,  
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,  
               buff_y, inc_y );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_x     = ( scomplex * ) FLA_COMPLEX_PTR( x );
    scomplex *buff_y     = ( scomplex * ) FLA_COMPLEX_PTR( y );
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
    scomplex *buff_beta  = ( scomplex * ) FLA_COMPLEX_PTR( beta );

    bli_cgemv( blis_transa,
               blis_conjx,
               m_A,
               n_A, 
               buff_alpha,  
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,  
               buff_y, inc_y );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_x     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );
    dcomplex *buff_y     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y );
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    dcomplex *buff_beta  = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );

    bli_zgemv( blis_transa,
               blis_conjx,
               m_A,
               n_A, 
               buff_alpha,  
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,  
               buff_y, inc_y );

    break;
  }

  }
  
  return FLA_SUCCESS;
}
Пример #18
0
FLA_Error FLA_Trsv_external_gpu( FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, void* A_gpu, FLA_Obj x, void* x_gpu ) 
{
  FLA_Datatype datatype;
  int          m_A;
  int          ldim_A;
  int          inc_x;
  char         blas_uplo;
  char         blas_trans;
  char         blas_diag;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Trsv_check( uplo, trans, diag, A, x );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  ldim_A   = FLA_Obj_length( A );

  inc_x    = 1;

  FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
  FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans );
  FLA_Param_map_flame_to_netlib_diag( diag, &blas_diag );


  switch( datatype ){

  case FLA_FLOAT:
  {
    cublasStrsv( blas_uplo,
                 blas_trans,
                 blas_diag,
                 m_A,
                 ( float * ) A_gpu, ldim_A,
                 ( float * ) x_gpu, inc_x );

    break;
  }

  case FLA_DOUBLE:
  {
    cublasDtrsv( blas_uplo,
                 blas_trans,
                 blas_diag,
                 m_A,
                 ( double * ) A_gpu, ldim_A,
                 ( double * ) x_gpu, inc_x );

    break;
  }

  case FLA_COMPLEX:
  {
    cublasCtrsv( blas_uplo,
                 blas_trans,
                 blas_diag,
                 m_A,
                 ( cuComplex * ) A_gpu, ldim_A,
                 ( cuComplex * ) x_gpu, inc_x );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    cublasZtrsv( blas_uplo,
                 blas_trans,
                 blas_diag,
                 m_A,
                 ( cuDoubleComplex * ) A_gpu, ldim_A,
                 ( cuDoubleComplex * ) x_gpu, inc_x );

    break;
  }

  }

  return FLA_SUCCESS;
}