FLA_Error FLA_Tevd_eigval_v_opt_var1( FLA_Obj G, FLA_Obj d, FLA_Obj e, FLA_Obj k )
{
	FLA_Datatype datatype;
	int          m_A, n_G;
	int          rs_G, cs_G;
	int          inc_d;
	int          inc_e;

	datatype = FLA_Obj_datatype( d );

	m_A      = FLA_Obj_vector_dim( d );
	n_G      = FLA_Obj_width( G );

	rs_G     = FLA_Obj_row_stride( G );
	cs_G     = FLA_Obj_col_stride( G );

	inc_d    = FLA_Obj_vector_inc( d );
	inc_e    = FLA_Obj_vector_inc( e );
	

	switch ( datatype )
	{
		case FLA_FLOAT:
		{
			scomplex* buff_G = FLA_COMPLEX_PTR( G );
			float*    buff_d = FLA_FLOAT_PTR( d );
			float*    buff_e = FLA_FLOAT_PTR( e );
			int*      buff_k = FLA_INT_PTR( k );

			FLA_Tevd_eigval_v_ops_var1( m_A,
			                            n_G,
			                            buff_G, rs_G, cs_G,
			                            buff_d, inc_d,
			                            buff_e, inc_e,
			                            buff_k );

			break;
		}

		case FLA_DOUBLE:
		{
			dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
			double*   buff_d = FLA_DOUBLE_PTR( d );
			double*   buff_e = FLA_DOUBLE_PTR( e );
			int*      buff_k = FLA_INT_PTR( k );

			FLA_Tevd_eigval_v_opd_var1( m_A,
			                            n_G,
			                            buff_G, rs_G, cs_G,
			                            buff_d, inc_d,
			                            buff_e, inc_e,
			                            buff_k );

			break;
		}
	}

	return FLA_SUCCESS;
}
FLA_Error FLA_Trinv_lu_ops_var2( int mn_A,
                                 float* buff_A, int rs_A, int cs_A )
{
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
  int       i;

  for ( i = 0; i < mn_A; ++i )
  {
    float*    a21       = buff_A + (i  )*cs_A + (i+1)*rs_A;
    float*    A22       = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    int       mn_ahead  = mn_A - i - 1;

    /*------------------------------------------------------------*/

    // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A22, a21 );
    bl1_strsv( BLIS1_LOWER_TRIANGULAR,
               BLIS1_NO_TRANSPOSE,
               BLIS1_UNIT_DIAG,
               mn_ahead,
               A22, rs_A, cs_A,
               a21, rs_A );

    // FLA_Scal_external( FLA_MINUS_ONE, a21 );
    bl1_sscalv( BLIS1_NO_CONJUGATE,
                mn_ahead,
                buff_m1,
                a21, rs_A );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
示例#3
0
FLA_Error FLA_Obj_create_complex_constant( double const_real, double const_imag, FLA_Obj *obj )
{
  int*      temp_i;
  float*    temp_s;
  double*   temp_d;
  scomplex* temp_c;
  dcomplex* temp_z;

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Obj_create_complex_constant_check( const_real, const_imag, obj );

  FLA_Obj_create( FLA_CONSTANT, 1, 1, 0, 0, obj );

#ifdef FLA_ENABLE_SCC
  if ( !FLA_is_owner() )
    return FLA_SUCCESS;
#endif

  temp_i       = FLA_INT_PTR( *obj );
  temp_s       = FLA_FLOAT_PTR( *obj );
  temp_d       = FLA_DOUBLE_PTR( *obj );
  temp_c       = FLA_COMPLEX_PTR( *obj );
  temp_z       = FLA_DOUBLE_COMPLEX_PTR( *obj );

  *temp_i      = ( int   ) const_real;
  *temp_s      = ( float ) const_real;
  *temp_d      =           const_real;
  temp_c->real = ( float ) const_real;
  temp_c->imag = ( float ) const_imag;
  temp_z->real =           const_real;
  temp_z->imag =           const_imag;

  return FLA_SUCCESS;
}
示例#4
0
FLA_Error FLA_Chol_u_opt_var3( FLA_Obj A )
{
  FLA_Error    r_val = FLA_SUCCESS;
  FLA_Datatype datatype;
  int          mn_A;
  int          rs_A, cs_A;

  datatype = FLA_Obj_datatype( A );

  mn_A     = FLA_Obj_length( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );

      r_val = FLA_Chol_u_ops_var3( mn_A,
                                   buff_A, rs_A, cs_A );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );

      r_val = FLA_Chol_u_opd_var3( mn_A,
                                   buff_A, rs_A, cs_A );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );

      r_val = FLA_Chol_u_opc_var3( mn_A,
                                   buff_A, rs_A, cs_A );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );

      r_val = FLA_Chol_u_opz_var3( mn_A,
                                   buff_A, rs_A, cs_A );

      break;
    }
  }

  return r_val;
}
示例#5
0
FLA_Error FLA_Absolute_value( FLA_Obj alpha )
{
  FLA_Datatype datatype;

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Absolute_value_check( alpha );

  datatype = FLA_Obj_datatype( alpha );
  
  switch ( datatype ){

  case FLA_FLOAT:
  {
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );

    *buff_alpha = ( float ) fabs( ( double ) *buff_alpha );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );

    *buff_alpha = fabs( *buff_alpha );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );

    buff_alpha->real = ( float ) sqrt( ( double ) buff_alpha->real * buff_alpha->real + 
                                                  buff_alpha->imag * buff_alpha->imag );
    buff_alpha->imag = 0.0F; 


    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );

    buff_alpha->real = sqrt( buff_alpha->real * buff_alpha->real + 
                             buff_alpha->imag * buff_alpha->imag );
    buff_alpha->imag = 0.0; 

    break;
  }

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Ahx_Ax_ops_var1( int m_A,
                                     int n_A,
                                     float* buff_A, int rs_A, int cs_A, 
                                     float* buff_x, int inc_x, 
                                     float* buff_v, int inc_v, 
                                     float* buff_w, int inc_w )
{
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  int       i;

  bl1_ssetv( m_A,
             buff_0,
             buff_w, inc_w );

  for ( i = 0; i < n_A; ++i )
  {
    float*    a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    float*    nu1      = buff_v + (i  )*inc_v;
    float*    x        = buff_x;
    float*    chi1     = buff_x + (i  )*inc_x;
    float*    w        = buff_w;

    /*------------------------------------------------------------*/

    bl1_sdot( BLIS1_CONJUGATE,
              m_A,
              a1, rs_A,
              x,  inc_x,
              nu1 );
/*
    *nu1 = F77_sdot( &m_A,
                     a1, &rs_A,
                     x,  &inc_x );
*/

    bl1_saxpyv( BLIS1_NO_CONJUGATE,
                m_A,
                chi1,
                a1, rs_A,
                w,  inc_w );
/*
    F77_saxpy( &m_A,
               chi1,
               a1, &rs_A,
               w,  &inc_w );
*/

    /*------------------------------------------------------------*/

  }


  return FLA_SUCCESS;
}
FLA_Error FLA_Trinv_lu_ops_var3( int mn_A,
                                 float* buff_A, int rs_A, int cs_A )
{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
  int       i;

  for ( i = 0; i < mn_A; ++i )
  {
    float*    a10t      = buff_A + (0  )*cs_A + (i  )*rs_A;
    float*    A20       = buff_A + (0  )*cs_A + (i+1)*rs_A;
    float*    a21       = buff_A + (i  )*cs_A + (i+1)*rs_A;

    int       mn_ahead  = mn_A - i - 1;
    int       mn_behind = i;

    /*------------------------------------------------------------*/

    // FLA_Scal_external( FLA_MINUS_ONE, a21 );
    bl1_sscalv( BLIS1_NO_CONJUGATE,
                mn_ahead,
                buff_m1,
                a21, rs_A );

    // FLA_Ger_external( FLA_ONE, a21, a10t, A20 );
    bl1_sger( BLIS1_NO_CONJUGATE,
              BLIS1_NO_CONJUGATE,
              mn_ahead,
              mn_behind,
              buff_1,
              a21, rs_A,
              a10t, cs_A,
              A20, rs_A, cs_A );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
示例#8
0
FLA_Error FLA_Chol_u_opc_var3( int mn_A,
                               scomplex* buff_A, int rs_A, int cs_A )
{
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
  int       i;
  FLA_Error e_val;

  for ( i = 0; i < mn_A; ++i )
  {
    scomplex* alpha11   = buff_A + (i  )*cs_A + (i  )*rs_A;
    scomplex* a12t      = buff_A + (i+1)*cs_A + (i  )*rs_A;
    scomplex* A22       = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    int       mn_ahead  = mn_A - i - 1;
    int       mn_behind = i;

    /*------------------------------------------------------------*/

    // r_val = FLA_Sqrt( alpha11 );
    // if ( r_val != FLA_SUCCESS )
    //   return ( FLA_Obj_length( A00 ) + 1 );
    bl1_csqrte( alpha11, &e_val );
    if ( e_val != FLA_SUCCESS ) return mn_behind;

    // FLA_Inv_scal_external( alpha11, a12t );
    bl1_cinvscalv( BLIS1_NO_CONJUGATE,
                   mn_ahead,
                   alpha11,
                   a12t, cs_A );

    // FLA_Herc_external( FLA_UPPER_TRIANGULAR, FLA_MINUS_ONE, a12t, A22 );
    bl1_cher( BLIS1_UPPER_TRIANGULAR,
              BLIS1_CONJUGATE,
              mn_ahead,
              buff_m1,
              a12t, cs_A,
              A22, rs_A, cs_A );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
示例#9
0
FLA_Error FLA_Ttmm_l_ops_var3( int mn_A,
                               float* buff_A, int rs_A, int cs_A )
{
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  int       i;

  for ( i = 0; i < mn_A; ++i )
  {
    float*    alpha11   = buff_A + (i  )*cs_A + (i  )*rs_A;
    float*    a21       = buff_A + (i  )*cs_A + (i+1)*rs_A;
    float*    A22       = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    int       mn_ahead  = mn_A - i - 1;

    /*------------------------------------------------------------*/

    // FLA_Absolute_square( alpha11 );
    bl1_sabsqr( alpha11 );

    // FLA_Dotcs_external( FLA_CONJUGATE, FLA_ONE, a21, a21, FLA_ONE, alpha11 );
    bl1_sdots( BLIS1_CONJUGATE,
               mn_ahead,
               buff_1,
               a21, rs_A,
               a21, rs_A,
               buff_1,
               alpha11 );

    // FLA_Trmv_external( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, A22, a21 );
    bl1_strmv( BLIS1_LOWER_TRIANGULAR,
               BLIS1_CONJ_TRANSPOSE,
               BLIS1_NONUNIT_DIAG,
               mn_ahead,
               A22, rs_A, cs_A,
               a21, rs_A );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
示例#10
0
FLA_Error FLA_Copy_external( FLA_Obj A, FLA_Obj B )
{
  FLA_Datatype dt_A;
  FLA_Datatype dt_B;
  int          m_B, n_B;
  int          rs_A, cs_A;
  int          rs_B, cs_B;
  trans_t      blis_trans;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Copy_check( A, B );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  dt_A     = FLA_Obj_datatype( A );
  dt_B     = FLA_Obj_datatype( B );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  m_B      = FLA_Obj_length( B );
  n_B      = FLA_Obj_width( B );
  rs_B     = FLA_Obj_row_stride( B );
  cs_B     = FLA_Obj_col_stride( B );

  if ( FLA_Obj_is_conformal_to( FLA_NO_TRANSPOSE, A, B ) )
    FLA_Param_map_flame_to_blis_trans( FLA_NO_TRANSPOSE, &blis_trans );
  else // if ( FLA_Obj_is_conformal_to( FLA_TRANSPOSE, A, B ) )
    FLA_Param_map_flame_to_blis_trans( FLA_TRANSPOSE, &blis_trans );

  // If A is of type FLA_CONSTANT, then we have to proceed based on the
  // datatype of B.
  if      ( dt_A == FLA_CONSTANT )
  {
    if      ( dt_B == FLA_FLOAT )
    {
      float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
      float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
      
      bli_scopymt( blis_trans,
                   m_B,
                   n_B,
                   buff_A, rs_A, cs_A,
                   buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_DOUBLE )
    {
      double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
      double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
      
      bli_dcopymt( blis_trans,
                   m_B,
                   n_B,
                   buff_A, rs_A, cs_A,
                   buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_COMPLEX )
    {
      scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
      scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
      
      bli_ccopymt( blis_trans,
                   m_B,
                   n_B,
                   buff_A, rs_A, cs_A,
                   buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_DOUBLE_COMPLEX )
    {
      dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
      
      bli_zcopymt( blis_trans,
                   m_B,
                   n_B,
                   buff_A, rs_A, cs_A,
                   buff_B, rs_B, cs_B );
    }
  }
  else if ( dt_A == FLA_INT )
  {
    int*      buff_A = ( int * ) FLA_INT_PTR( A );
    int*      buff_B = ( int * ) FLA_INT_PTR( B );

    bli_icopymt( blis_trans,
                 m_B,
                 n_B,
                 buff_A, rs_A, cs_A,
                 buff_B, rs_B, cs_B );
  }
  else if ( dt_A == FLA_FLOAT )
  {
    float *buff_A = ( float * ) FLA_FLOAT_PTR( A );

    if      ( dt_B == FLA_FLOAT )
    {
      float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
      
      bli_scopymt( blis_trans,
                   m_B,
                   n_B,
                   buff_A, rs_A, cs_A,
                   buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_DOUBLE )
    {
      double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
      
      bli_sdcopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_COMPLEX )
    {
      scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
      
      bli_sccopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_DOUBLE_COMPLEX )
    {
      dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
      
      bli_szcopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
  }
  else if ( dt_A == FLA_DOUBLE )
  {
    double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );

    if      ( dt_B == FLA_FLOAT )
    {
      float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
      
      bli_dscopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_DOUBLE )
    {
      double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
      
      bli_dcopymt( blis_trans,
                   m_B,
                   n_B,
                   buff_A, rs_A, cs_A,
                   buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_COMPLEX )
    {
      scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
      
      bli_dccopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_DOUBLE_COMPLEX )
    {
      dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
      
      bli_dzcopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
  }
  else if ( dt_A == FLA_COMPLEX )
  {
    scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );

    if      ( dt_B == FLA_FLOAT )
    {
      float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
      
      bli_cscopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_DOUBLE )
    {
      double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
      
      bli_cdcopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_COMPLEX )
    {
      scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
      
      bli_ccopymt( blis_trans,
                   m_B,
                   n_B,
                   buff_A, rs_A, cs_A,
                   buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_DOUBLE_COMPLEX )
    {
      dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
      
      bli_czcopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
  }
  else if ( dt_A == FLA_DOUBLE_COMPLEX )
  {
    dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );

    if      ( dt_B == FLA_FLOAT )
    {
      float *buff_B = ( float * ) FLA_FLOAT_PTR( B );
      
      bli_zscopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_DOUBLE )
    {
      double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );
      
      bli_zdcopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_COMPLEX )
    {
      scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
      
      bli_zccopymt( blis_trans,
                    m_B,
                    n_B,
                    buff_A, rs_A, cs_A,
                    buff_B, rs_B, cs_B );
    }
    else if ( dt_B == FLA_DOUBLE_COMPLEX )
    {
      dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
      
      bli_zcopymt( blis_trans,
                   m_B,
                   n_B,
                   buff_A, rs_A, cs_A,
                   buff_B, rs_B, cs_B );
    }
  }
  
  return FLA_SUCCESS;
}
示例#11
0
FLA_Error FLA_LU_piv_ops_var5( int m_A,
                               int n_A,
                               float*    buff_A, int rs_A, int cs_A,
                               int*      buff_p, int inc_p )
{
  FLA_Error r_val   = FLA_SUCCESS;
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
  int       min_m_n = min( m_A, n_A );
  int       i;

  for ( i = 0; i < min_m_n; ++i )
  {
    float     pivot_val = fzero;
    float*    a10t      = buff_A + (0  )*cs_A + (i  )*rs_A;
    float*    alpha11   = buff_A + (i  )*cs_A + (i  )*rs_A;
    float*    a21       = buff_A + (i  )*cs_A + (i+1)*rs_A;
    float*    a12t      = buff_A + (i+1)*cs_A + (i  )*rs_A;
    float*    A22       = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    int*      pi1       = buff_p + i*inc_p;

    int       m_ahead   = m_A - i - 1;
    int       n_ahead   = n_A - i - 1;

    /*------------------------------------------------------------*/

    // FLA_Merge_2x1( alpha11,
    //                    a21,      &aB1 );

    // FLA_Amax_external( aB1, pi1 );
    bl1_samax( m_ahead + 1,
               alpha11, rs_A,
               pi1 );

    // If a null pivot is encountered, return the index. 
    pivot_val = *(alpha11 + *pi1);
    if ( pivot_val == fzero )  r_val = ( r_val == FLA_SUCCESS ? i : r_val );
    else
    {
      // FLA_Merge_1x2( ABL, ABR,      &AB );
      
      // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
      FLA_Apply_pivots_ln_ops_var1( n_A,
                                    a10t, rs_A, cs_A,
                                    0,
                                    0,
                                    pi1, inc_p );
      
      // FLA_Inv_scal_external( alpha11, a21 );
      bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                     m_ahead,
                     alpha11,
                     a21, rs_A );
    }

    // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
    bl1_sger( BLIS1_NO_CONJUGATE,
              BLIS1_NO_CONJUGATE,
              m_ahead,
              n_ahead,
              buff_m1,
              a21, rs_A,
              a12t, cs_A,
              A22, rs_A, cs_A );

    /*------------------------------------------------------------*/

  }

  return r_val;
}
示例#12
0
FLA_Error FLA_LU_piv_opt_var5( FLA_Obj A, FLA_Obj p )
{
  FLA_Error    r_val = FLA_SUCCESS;
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_p;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_p    = FLA_Obj_vector_inc( p );


  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      int*   buff_p = FLA_INT_PTR( p );

      r_val = FLA_LU_piv_ops_var5( m_A,
                                   n_A,
                                   buff_A, rs_A, cs_A,
                                   buff_p, inc_p );
      
      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      int*    buff_p = FLA_INT_PTR( p );

      r_val = FLA_LU_piv_opd_var5( m_A,
                                   n_A,
                                   buff_A, rs_A, cs_A,
                                   buff_p, inc_p );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      int*      buff_p = FLA_INT_PTR( p );

      r_val = FLA_LU_piv_opc_var5( m_A,
                                   n_A,
                                   buff_A, rs_A, cs_A,
                                   buff_p, inc_p );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      int*      buff_p = FLA_INT_PTR( p );

      r_val = FLA_LU_piv_opz_var5( m_A,
                                   n_A,
                                   buff_A, rs_A, cs_A,
                                   buff_p, inc_p );
      
      break;
    }
  }

  return r_val;
}
示例#13
0
文件: FLA_Sort.c 项目: pgawron/tlash
FLA_Error FLA_Sort( FLA_Direct direct, FLA_Obj x )
{
	FLA_Datatype datatype;
	FLA_Obj      x_use;
	dim_t        m_x;
	dim_t        inc_x;

	if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
		FLA_Sort_check( direct, x );

	datatype = FLA_Obj_datatype( x );

	m_x      = FLA_Obj_vector_dim( x );
	inc_x    = FLA_Obj_vector_inc( x );

	// If the vector does not have unit stride, copy it to a temporary vector
	// that does have unit stride.
	if ( inc_x != 1 )
	{
		FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, x, &x_use );
		inc_x = FLA_Obj_vector_inc( x_use );
	}
	else
	{
		x_use = x;
	}

	switch ( datatype )
	{
		case FLA_FLOAT:
		{
			float* x_p = ( float* ) FLA_FLOAT_PTR( x_use );

			if ( direct == FLA_FORWARD )
				FLA_Sort_f_ops( m_x,
				                x_p, inc_x );
			else // if ( direct == FLA_BACKWARD )
				FLA_Sort_b_ops( m_x,
				                x_p, inc_x );

			break;
		}

		case FLA_DOUBLE:
		{
			double* x_p = ( double* ) FLA_DOUBLE_PTR( x_use );

			if ( direct == FLA_FORWARD )
				FLA_Sort_f_opd( m_x,
				                x_p, inc_x );
			else // if ( direct == FLA_BACKWARD )
				FLA_Sort_b_opd( m_x,
				                x_p, inc_x );

			break;
		}

	}

	if ( inc_x != 1 )
	{
		FLA_Copy( x_use, x );
		FLA_Obj_free( &x_use );
	}

	return FLA_SUCCESS;
}
示例#14
0
FLA_Error FLA_Hess_UT_step_ops_var4( int m_A,
                                     int m_T,
                                     float* buff_A, int rs_A, int cs_A, 
                                     float* buff_Y, int rs_Y, int cs_Y,
                                     float* buff_Z, int rs_Z, int cs_Z,
                                     float* buff_T, int rs_T, int cs_T )
{
  float*    buff_2  = FLA_FLOAT_PTR( FLA_TWO );
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );

  float     first_elem, last_elem;
  float     dot_product;
  float     beta, conj_beta;
  float     inv_tau11;
  float     minus_inv_tau11;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_T;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
  float*    buff_d = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_e = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_d  = 1;
  int       inc_e  = 1;
  int       inc_f  = 1;

  // FLA_Set( FLA_ZERO, Y );
  // FLA_Set( FLA_ZERO, Z );
  bl1_ssetm( m_A,
             b_alg,
             buff_0,
             buff_Y, rs_Y, cs_Y );
  bl1_ssetm( m_A,
             b_alg,
             buff_0,
             buff_Z, rs_Z, cs_Z );

  for ( i = 0; i < b_alg; ++i )
  {
    float*    a10t     = buff_A + (0  )*cs_A + (i  )*rs_A;
    float*    A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    float*    alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    float*    a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    float*    A02      = buff_A + (i+1)*cs_A + (0  )*rs_A;
    float*    a12t     = buff_A + (i+1)*cs_A + (i  )*rs_A;
    float*    A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    float*    y10t     = buff_Y + (0  )*cs_Y + (i  )*rs_Y;
    float*    Y20      = buff_Y + (0  )*cs_Y + (i+1)*rs_Y;
    float*    y21      = buff_Y + (i  )*cs_Y + (i+1)*rs_Y;

    float*    z10t     = buff_Z + (0  )*cs_Z + (i  )*rs_Z;
    float*    Z20      = buff_Z + (0  )*cs_Z + (i+1)*rs_Z;
    float*    z21      = buff_Z + (i  )*cs_Z + (i+1)*rs_Z;

    float*    t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    float*    tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    float*    d0       = buff_d + (0  )*inc_d;

    float*    e0       = buff_e + (0  )*inc_e;

    float*    f0       = buff_f + (0  )*inc_f;

    float*    a10t_r   = a10t   + (i-1)*cs_A + (0  )*rs_A;

    float*    a21_t    = a21    + (0  )*cs_A + (0  )*rs_A;
    float*    a21_b    = a21    + (0  )*cs_A + (1  )*rs_A;

    float*    ABL      = a10t;
    float*    ZBL      = z10t;

    float*    a2       = alpha11;

    int       m_ahead  = m_A - i - 1;
    int       n_ahead  = m_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( a10t_r, last_elem );
      // FLA_Set( FLA_ONE, a10t_r );
      last_elem = *a10t_r;
      *a10t_r = *buff_1;
    }

    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
    // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
    bl1_sgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ABL,  rs_A, cs_A,
               y10t, cs_Y,
               buff_1,
               a2,   rs_A );
    bl1_sgemv( BLIS1_NO_TRANSPOSE,
               BLIS1_CONJUGATE,
               m_ahead + 1,
               n_behind,
               buff_m1,
               ZBL,  rs_Z, cs_Z,
               a10t, cs_A,
               buff_1,
               a2,   rs_A );

    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
    // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
    bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_m1,
               Y20,  rs_Y, cs_Y,
               a10t, cs_A,
               buff_1,
               a12t, cs_A );
    bl1_sgemv( BLIS1_CONJ_NO_TRANSPOSE,
               BLIS1_NO_CONJUGATE,
               m_ahead,
               n_behind,
               buff_m1,
               A20,  rs_A, cs_A,
               z10t, cs_Z,
               buff_1,
               a12t, cs_A );

    if ( m_behind > 0 )
    {
      // FLA_Copy( last_elem, a10t_r );
      *a10t_r = last_elem;
    }

    if ( m_ahead > 0 )
    {
      // FLA_Househ2_UT( FLA_LEFT,
      //                 a21_t,
      //                 a21_b, tau11 );
      FLA_Househ2_UT_l_ops( m_ahead - 1,
                            a21_t,
                            a21_b, rs_A,
                            tau11 );

      // FLA_Set( FLA_ONE, inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
      // FLA_Copy( inv_tau11, minus_inv_tau11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
      bl1_sdiv3( buff_1, tau11, &inv_tau11 );
      bl1_sneg2( &inv_tau11, &minus_inv_tau11 );

      // FLA_Copy( a21_t, first_elem );
      // FLA_Set( FLA_ONE, a21_t );
      first_elem = *a21_t;
      *a21_t = *buff_1;

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_0,
                 y21, rs_Y );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_0,
                 z21, rs_Z );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 a21, rs_A,
                 buff_0,
                 d0,  inc_d );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Y20, rs_Y, cs_Y,
                 a21, rs_A,
                 buff_0,
                 e0,  inc_e );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 Z20, rs_Z, cs_Z,
                 a21, rs_A,
                 buff_0,
                 f0,  inc_f );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Y20, rs_Y, cs_Y,
                 d0,  inc_d,
                 buff_1,
                 y21, rs_Y );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20, rs_A, cs_A,
                 f0,  inc_f,
                 buff_1,
                 y21, rs_Y );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 A20, rs_A, cs_A,
                 e0,  inc_e,
                 buff_1,
                 z21, rs_Z );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_m1,
                 Z20, rs_Z, cs_Z,
                 d0,  inc_d,
                 buff_1,
                 z21, rs_Z );

      // FLA_Copy( d0, t01 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  n_behind,
                  d0,  inc_d,
                  t01, rs_T );

      // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
      // FLA_Inv_scal( FLA_TWO, beta );
      // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
      bl1_sdot( BLIS1_CONJUGATE,
                m_ahead,
                a21, rs_A,
                z21, rs_Z,
                &beta );
      bl1_sinvscals( buff_2, &beta );
      bl1_scopyconj( &beta, &conj_beta );

      // FLA_Scal( minus_inv_tau11, conj_beta );
      // FLA_Axpy( conj_beta, a21, y21 );
      // FLA_Scal( inv_tau11, y21 );
      bl1_sscals( &minus_inv_tau11, &conj_beta );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &conj_beta,
                  a21, rs_A,
                  y21, rs_Y );
      bl1_sscalv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &inv_tau11,
                  y21, rs_Y );

      // FLA_Scal( minus_inv_tau11, beta );
      // FLA_Axpy( beta, a21, z21 );
      // FLA_Scal( inv_tau11, z21 );
      bl1_sscals( &minus_inv_tau11, &beta );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  a21, rs_A,
                  z21, rs_Z );
      bl1_sscalv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &inv_tau11,
                  z21, rs_Z );

      // FLA_Dot( a12t, a21, dot_product );
      // FLA_Scal( minus_inv_tau11, dot_product );
      // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
      bl1_sdot( BLIS1_NO_CONJUGATE,
                m_ahead,
                a12t, cs_A,
                a21,  rs_A,
                &dot_product );
      bl1_sscals( &minus_inv_tau11, &dot_product );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  m_ahead,
                  &dot_product,
                  a21,  rs_A,
                  a12t, cs_A );

      // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
      // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
      bl1_sgemv( BLIS1_NO_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_behind,
                 n_ahead,
                 buff_1,
                 A02, rs_A, cs_A,
                 a21, rs_A,
                 buff_0,
                 e0,  inc_e );
      bl1_sger( BLIS1_NO_CONJUGATE,
                BLIS1_CONJUGATE,
                m_behind,
                n_ahead,
                &minus_inv_tau11,
                e0,  inc_e,
                a21, rs_A,
                A02, rs_A, cs_A );

      // FLA_Copy( first_elem, a21_t );
      *a21_t = first_elem;
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &d );
  // FLA_Obj_free( &e );
  // FLA_Obj_free( &f );
  FLA_free( buff_d );
  FLA_free( buff_e );
  FLA_free( buff_f );

  return FLA_SUCCESS;
}
示例#15
0
FLA_Error FLA_Herc_external( FLA_Uplo uplo, FLA_Conj conj, FLA_Obj alpha, FLA_Obj x, FLA_Obj A )
{
  FLA_Datatype datatype;
  int          m_A;
  int          rs_A, cs_A;
  int          inc_x; 
  uplo_t       blis_uplo;
  conj_t       blis_conj;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Herc_check( uplo, conj, alpha, x, A );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_x    = FLA_Obj_vector_inc( x );

  FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
  FLA_Param_map_flame_to_blis_conj( conj, &blis_conj );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_x     = ( float * ) FLA_FLOAT_PTR( x );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );

    bli_ssyr( blis_uplo,
              m_A,
              buff_alpha,
              buff_x, inc_x,
              buff_A, rs_A, cs_A ); 
    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_x     = ( double * ) FLA_DOUBLE_PTR( x );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );

    bli_dsyr( blis_uplo,
              m_A,
              buff_alpha,
              buff_x, inc_x,
              buff_A, rs_A, cs_A ); 
    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_x     = ( scomplex * ) FLA_COMPLEX_PTR( x );
    float    *buff_alpha = ( float    * ) FLA_FLOAT_PTR( alpha );

    bli_cher( blis_uplo,
              blis_conj,
              m_A,
              buff_alpha,
              buff_x, inc_x,
              buff_A, rs_A, cs_A ); 

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_x     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );
    double   *buff_alpha = ( double   * ) FLA_DOUBLE_PTR( alpha );

    bli_zher( blis_uplo,
              blis_conj,
              m_A,
              buff_alpha,
              buff_x, inc_x,
              buff_A, rs_A, cs_A ); 

    break;
  }

  }

  return FLA_SUCCESS;
}
示例#16
0
FLA_Error FLA_Symm_external( FLA_Side side, FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C )
{
  FLA_Datatype datatype;
  int          m_C, n_C;
  int          rs_A, cs_A;
  int          rs_B, cs_B;
  int          rs_C, cs_C;
  side_t       blis_side;
  uplo_t       blis_uplo; 

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Symm_check( side, uplo, alpha, A, B, beta, C );

  if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_B     = FLA_Obj_row_stride( B );
  cs_B     = FLA_Obj_col_stride( B );

  m_C      = FLA_Obj_length( C );
  n_C      = FLA_Obj_width( C );
  rs_C     = FLA_Obj_row_stride( C );
  cs_C     = FLA_Obj_col_stride( C );

  FLA_Param_map_flame_to_blis_side( side, &blis_side );
  FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_B     = ( float * ) FLA_FLOAT_PTR( B );
    float *buff_C     = ( float * ) FLA_FLOAT_PTR( C );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );

    bli_ssymm( blis_side, 
               blis_uplo, 
               m_C, 
               n_C, 
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_B, rs_B, cs_B,
               buff_beta,  
               buff_C, rs_C, cs_C );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_B     = ( double * ) FLA_DOUBLE_PTR( B );
    double *buff_C     = ( double * ) FLA_DOUBLE_PTR( C );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );

    bli_dsymm( blis_side, 
               blis_uplo, 
               m_C, 
               n_C, 
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_B, rs_B, cs_B,
               buff_beta,  
               buff_C, rs_C, cs_C );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_B     = ( scomplex * ) FLA_COMPLEX_PTR( B );
    scomplex *buff_C     = ( scomplex * ) FLA_COMPLEX_PTR( C );
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
    scomplex *buff_beta  = ( scomplex * ) FLA_COMPLEX_PTR( beta );

    bli_csymm( blis_side, 
               blis_uplo, 
               m_C, 
               n_C, 
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_B, rs_B, cs_B,
               buff_beta,  
               buff_C, rs_C, cs_C );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_B     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );
    dcomplex *buff_C     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    dcomplex *buff_beta  = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );

    bli_zsymm( blis_side, 
               blis_uplo, 
               m_C, 
               n_C, 
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_B, rs_B, cs_B,
               buff_beta,  
               buff_C, rs_C, cs_C );

    break;
  }

  }
  
  return FLA_SUCCESS;
}
示例#17
0
FLA_Bool FLA_Obj_equals( FLA_Obj A, FLA_Obj B )
{
  FLA_Datatype datatype_A;
  FLA_Datatype datatype_B;
  FLA_Datatype datatype;
  dim_t        m, n;
  dim_t        rs_A, cs_A;
  dim_t        rs_B, cs_B;
  dim_t        i, j;

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Obj_equals_check( A, B );

  m      = FLA_Obj_length( A );
  n      = FLA_Obj_width( A );
  rs_A   = FLA_Obj_row_stride( A );
  cs_A   = FLA_Obj_col_stride( A );
  rs_B   = FLA_Obj_row_stride( B );
  cs_B   = FLA_Obj_col_stride( B );

  datatype_A = FLA_Obj_datatype( A );
  datatype_B = FLA_Obj_datatype( B );

  // If A is a non-FLA_CONSTANT object, then we should proceed based on the
  // value of datatype_A. In such a situation, either datatype_B is an exact
  // match and we're fine, or datatype_B is FLA_CONSTANT, in which case we're
  // also covered since FLA_CONSTANT encompassas all numerical types.
  // If A is an FLA_CONSTANT object, then we should proceed based on the value
  // of datatype_B. In this case, datatype_B is either a non-FLA_CONSTANT type,
  // which mirrors the second sub-case above, or datatype_B is FLA_CONSTANT,
  // in which case both types are FLA_CONSTANT and therefore we have to handle
  // that case. Only if both are FLA_CONSTANTs does the FLA_CONSTANT case
  // statement below execute.
  if ( datatype_A != FLA_CONSTANT )
    datatype = datatype_A;
  else
    datatype = datatype_B;

  switch ( datatype )
  {
    case FLA_CONSTANT:
    {
      // We require ALL floating-point fields to be the same.
      float*    buffs_A = ( float    * ) FLA_FLOAT_PTR( A );
      float*    buffs_B = ( float    * ) FLA_FLOAT_PTR( B );
      double*   buffd_A = ( double   * ) FLA_DOUBLE_PTR( A );
      double*   buffd_B = ( double   * ) FLA_DOUBLE_PTR( B );
      scomplex* buffc_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
      scomplex* buffc_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
      dcomplex* buffz_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buffz_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );

      if ( *buffs_A != *buffs_B ||
           *buffd_A != *buffd_B ||
           buffc_A->real != buffc_B->real ||
           buffc_A->imag != buffc_B->imag ||
           buffz_A->real != buffz_B->real ||
           buffz_A->imag != buffz_B->imag )
      {
        return FALSE;
      }

      break;
    }

    case FLA_INT:
    {
      int *buff_A = ( int * ) FLA_INT_PTR( A );
      int *buff_B = ( int * ) FLA_INT_PTR( B );

      for ( j = 0; j < n; j++ )
        for ( i = 0; i < m; i++ )
          if ( buff_A[ j * cs_A + i * rs_A ] != 
               buff_B[ j * cs_B + i * rs_B ] )
          {
            return FALSE;
          }

      break;
    }

    case FLA_FLOAT:
    {
      float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
      float *buff_B = ( float * ) FLA_FLOAT_PTR( B );

      for ( j = 0; j < n; j++ )
        for ( i = 0; i < m; i++ )
          if ( buff_A[ j * cs_A + i * rs_A ] != 
               buff_B[ j * cs_B + i * rs_B ] )
          {
            return FALSE;
          }

      break;
    }

    case FLA_DOUBLE:
    {
      double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
      double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );

      for ( j = 0; j < n; j++ )
        for ( i = 0; i < m; i++ )
          if ( buff_A[ j * cs_A + i * rs_A ] != 
               buff_B[ j * cs_B + i * rs_B ] )
          {
            return FALSE;
          }

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
      scomplex *buff_B = ( scomplex * ) FLA_COMPLEX_PTR( B );

      for ( j = 0; j < n; j++ )
        for ( i = 0; i < m; i++ )
          if ( buff_A[ j * cs_A + i * rs_A ].real != buff_B[ j * cs_B + i * rs_B ].real ||
               buff_A[ j * cs_A + i * rs_A ].imag != buff_B[ j * cs_B + i * rs_B ].imag )
          {
            return FALSE;
          }

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex *buff_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );

      for ( j = 0; j < n; j++ )
        for ( i = 0; i < m; i++ )
          if ( buff_A[ j * cs_A + i * rs_A ].real != buff_B[ j * cs_B + i * rs_B ].real ||
               buff_A[ j * cs_A + i * rs_A ].imag != buff_B[ j * cs_B + i * rs_B ].imag )
          {
            return FALSE;
          }

      break;
    }
  }

  return TRUE;
}
FLA_Error FLA_Tridiag_UT_l_step_ops_var2( int m_A,
                                          int m_T,
                                          float* buff_A, int rs_A, int cs_A, 
                                          float* buff_T, int rs_T, int cs_T )
{
  float*    buff_2  = FLA_FLOAT_PTR( FLA_TWO );
  float*    buff_1  = FLA_FLOAT_PTR( FLA_ONE );
  float*    buff_0  = FLA_FLOAT_PTR( FLA_ZERO );
  float*    buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );

  float     first_elem;
  float     beta;
  float     inv_tau11;
  float     minus_inv_tau11;
  float     minus_upsilon11, minus_conj_upsilon11;
  float     minus_zeta11, minus_conj_zeta11;
  int       i;

  // b_alg = FLA_Obj_length( T );
  int       b_alg = m_T;

  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
  float*    buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  float*    buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
  int       inc_u  = 1;
  int       inc_z  = 1;
  int       inc_w  = 1;

  // Initialize some variables (only to prevent compiler warnings).
  first_elem      = *buff_0;
  minus_inv_tau11 = *buff_0;

  for ( i = 0; i < b_alg; ++i )
  {
    float*    A20      = buff_A + (0  )*cs_A + (i+1)*rs_A;
    float*    alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
    float*    a21      = buff_A + (i  )*cs_A + (i+1)*rs_A;
    float*    A22      = buff_A + (i+1)*cs_A + (i+1)*rs_A;

    float*    t01      = buff_T + (i  )*cs_T + (0  )*rs_T;
    float*    tau11    = buff_T + (i  )*cs_T + (i  )*rs_T;

    float*    upsilon11= buff_u + (i  )*inc_u;
    float*    u21      = buff_u + (i+1)*inc_u;

    float*    zeta11   = buff_z + (i  )*inc_z;
    float*    z21      = buff_z + (i+1)*inc_z;

    float*    w21      = buff_w + (i+1)*inc_w;

    float*    a21_t    = a21    + (0  )*cs_A + (0  )*rs_A;
    float*    a21_b    = a21    + (0  )*cs_A + (1  )*rs_A;

    int       m_ahead  = m_A - i - 1;
    int       m_behind = i;
    int       n_behind = i;

    /*------------------------------------------------------------*/

    if ( m_behind > 0 )
    {
      // FLA_Copy( upsilon11, minus_upsilon11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
      // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
      bl1_smult3( buff_m1, upsilon11, &minus_upsilon11 );
      bl1_scopyconj( &minus_upsilon11, &minus_conj_upsilon11 );

      // FLA_Copy( zeta11, minus_zeta11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
      // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
      bl1_smult3( buff_m1, zeta11, &minus_zeta11 );
      bl1_scopyconj( &minus_zeta11, &minus_conj_zeta11 );

      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11,    alpha11 );
      // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11,    upsilon11, alpha11 );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  1,
                  &minus_upsilon11,
                  zeta11,  1,
                  alpha11, 1 );
      bl1_saxpyv( BLIS1_CONJUGATE,
                  1,
                  &minus_zeta11,
                  upsilon11, 1,
                  alpha11,  1 );

      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11,    u21, a21 );
      // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_zeta11,
                  u21, inc_u,
                  a21, rs_A );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &minus_conj_upsilon11,
                  z21, inc_z,
                  a21, rs_A );
    }

    if ( m_ahead > 0 )
    {
      // FLA_Househ2_UT( FLA_LEFT,
      //                 a21_t,
      //                 a21_b, tau11 );
      FLA_Househ2_UT_l_ops( m_ahead - 1,
                            a21_t,
                            a21_b, rs_A,
                            tau11 );

      // FLA_Set( FLA_ONE, inv_tau11 );
      // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
      // FLA_Copy( inv_tau11, minus_inv_tau11 );
      // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
      bl1_sdiv3( buff_1, tau11, &inv_tau11 );
      bl1_sneg2( &inv_tau11, &minus_inv_tau11 );

      // FLA_Copy( a21_t, first_elem );
      // FLA_Set( FLA_ONE, a21_t );
      first_elem = *a21_t;
      *a21_t = *buff_1;
    }

    if ( m_behind > 0 )
    {
      // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
      bl1_ssyr2( BLIS1_LOWER_TRIANGULAR,
                 m_ahead,
                 buff_m1,
                 u21, inc_u,
                 z21, inc_z,
                 A22, rs_A, cs_A );
    }

    if ( m_ahead > 0 )
    {
      // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
      bl1_ssymv( BLIS1_LOWER_TRIANGULAR,
                 m_ahead,
                 buff_1,
                 A22, rs_A, cs_A,
                 a21, rs_A,
                 buff_0,
                 w21, inc_w );

      // FLA_Copy( a21, u21 );
      // FLA_Copy( w21, z21 );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  a21, rs_A,
                  u21, inc_u );
      bl1_scopyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  w21, inc_w,
                  z21, inc_z );

      // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
      // FLA_Inv_scal( FLA_TWO, beta );
      bl1_sdot( BLIS1_CONJUGATE,
                m_ahead,
                a21, rs_A,
                z21, inc_z,
                &beta );
      bl1_sinvscals( buff_2, &beta );

      // FLA_Scal( minus_inv_tau11, beta );
      // FLA_Axpy( beta, a21, z21 );
      // FLA_Scal( inv_tau11, z21 );
      bl1_sscals( &minus_inv_tau11, &beta );
      bl1_saxpyv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &beta,
                  a21, rs_A,
                  z21, inc_z );
      bl1_sscalv( BLIS1_NO_CONJUGATE,
                  m_ahead,
                  &inv_tau11,
                  z21, inc_z );

      // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
      bl1_sgemv( BLIS1_CONJ_TRANSPOSE,
                 BLIS1_NO_CONJUGATE,
                 m_ahead,
                 n_behind,
                 buff_1,
                 A20, rs_A, cs_A,
                 a21, rs_A,
                 buff_0,
                 t01, rs_T );

      // FLA_Copy( first_elem, a21_t );
      *a21_t = first_elem;
    }

    if ( m_behind + 1 == b_alg && m_ahead > 0 )
    {
      // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
      bl1_ssyr2( BLIS1_LOWER_TRIANGULAR,
                 m_ahead,
                 buff_m1,
                 u21, inc_u,
                 z21, inc_z,
                 A22, rs_A, cs_A );
    }

    /*------------------------------------------------------------*/

  }

  // FLA_Obj_free( &u );
  // FLA_Obj_free( &z );
  // FLA_Obj_free( &w );
  FLA_free( buff_u );
  FLA_free( buff_z );
  FLA_free( buff_w );

  return FLA_SUCCESS;
}
示例#19
0
FLA_Error FLA_Househ3UD_UT( FLA_Obj chi_0, FLA_Obj x1, FLA_Obj y2, FLA_Obj tau )
/*
  Compute an up-and-downdating UT Householder transformation

          / / 1 0 0 \            / 1 0  0 \  / 1  \ ( 1  u1' v2' ) \
    H  =  | | 0 I 0 | - inv(tau) | 0 I  0 |  | u1 |                |
          \ \ 0 0 I /            \ 0 0 -I /  \ v2 /                /

  by computing tau, u1, and v2 such that the following is satisfied:

      / chi_0 \   / alpha \
    H |  x1   | = |   0   |
      \  y2   /   \   0   /

  where

    alpha  = - lambda * chi_0 / | chi_0 |

    lambda =  sqrt( conj(chi0) chi0 + x1' x1 - y2' y2 )

              / chi_0 \
    x      =  |  x1   |
              \  y2   /

    tau    =  ( 1 + u1' u1 - v2' v2 ) / 2

    u1     =  x1 / ( chi_0 - alpha )

    v2     =  -y2 / ( chi_0 - alpha )

  Upon completion, alpha, u1, and v2 have overwritten objects chi_0, x1,
  and y2, respectively.

  -FGVZ
*/
{
  FLA_Datatype datatype;
  int          m_x1;
  int          m_y2;
  int          inc_x1;
  int          inc_y2;

  datatype = FLA_Obj_datatype( x1 );

  m_x1     = FLA_Obj_vector_dim( x1 );
  m_y2     = FLA_Obj_vector_dim( y2 );
  inc_x1   = FLA_Obj_vector_inc( x1 );
  inc_y2   = FLA_Obj_vector_inc( y2 );

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Househ3UD_UT_check( chi_0, x1, y2, tau );

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* chi_0_p = ( float* ) FLA_FLOAT_PTR( chi_0 );
      float* x1_p    = ( float* ) FLA_FLOAT_PTR( x1 );
      float* y2_p    = ( float* ) FLA_FLOAT_PTR( y2 );
      float* tau_p   = ( float* ) FLA_FLOAT_PTR( tau );

      FLA_Househ3UD_UT_ops( m_x1,
                            m_y2,
                            chi_0_p,
                            x1_p, inc_x1,
                            y2_p, inc_y2,
                            tau_p );
      break;
    }

    case FLA_DOUBLE:
    {
      double* chi_0_p = ( double* ) FLA_DOUBLE_PTR( chi_0 );
      double* x1_p    = ( double* ) FLA_DOUBLE_PTR( x1 );
      double* y2_p    = ( double* ) FLA_DOUBLE_PTR( y2 );
      double* tau_p   = ( double* ) FLA_DOUBLE_PTR( tau );

      FLA_Househ3UD_UT_opd( m_x1,
                            m_y2,
                            chi_0_p,
                            x1_p, inc_x1,
                            y2_p, inc_y2,
                            tau_p );
      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* chi_0_p = ( scomplex* ) FLA_COMPLEX_PTR( chi_0 );
      scomplex* x1_p    = ( scomplex* ) FLA_COMPLEX_PTR( x1 );
      scomplex* y2_p    = ( scomplex* ) FLA_COMPLEX_PTR( y2 );
      scomplex* tau_p   = ( scomplex* ) FLA_COMPLEX_PTR( tau );

      FLA_Househ3UD_UT_opc( m_x1,
                            m_y2,
                            chi_0_p,
                            x1_p, inc_x1,
                            y2_p, inc_y2,
                            tau_p );
      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* chi_0_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( chi_0 );
      dcomplex* x1_p    = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( x1 );
      dcomplex* y2_p    = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( y2 );
      dcomplex* tau_p   = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau );

      FLA_Househ3UD_UT_opz( m_x1,
                            m_y2,
                            chi_0_p,
                            x1_p, inc_x1,
                            y2_p, inc_y2,
                            tau_p );
      break;
    }
  }

  return FLA_SUCCESS;
}
示例#20
0
FLA_Error FLA_Househ3UD_UT_ops( int       m_x1,
                                int       m_y2,
                                float*    chi_0,
                                float*    x1, int inc_x1,
                                float*    y2, int inc_y2,
                                float*    tau )
{
  float    one_half = *FLA_FLOAT_PTR( FLA_ONE_HALF );
  float    alpha;
  float    chi_0_minus_alpha;
  float    neg_chi_0_minus_alpha;
  float    abs_chi_0;
  float    norm_x_1;
  float    norm_y_2;
  float    lambda;
  float    abs_sq_chi_0_minus_alpha;
  int      i_one = 1;

  //
  // Compute the 2-norms of x_1 and y_2:
  //
  //   norm_x_1 := || x_1 ||_2
  //   norm_y_2 := || y_2 ||_2
  //

  bl1_snrm2( m_x1,
             x1, inc_x1,
             &norm_x_1 );

  bl1_snrm2( m_y2,
             y2, inc_y2,
             &norm_y_2 );

  //
  // If 2-norms of x_1, y_2 are zero, then return with trivial tau, chi_0 values.
  //

  if ( norm_x_1 == 0.0F && 
       norm_y_2 == 0.0F )
  {
    *chi_0 = -(*chi_0);
    *tau   = one_half;

    return FLA_SUCCESS;
  }

  //
  // Compute the absolute value (magnitude) of chi_0, which equals the 2-norm
  // of chi_0:
  //
  //   abs_chi_0 :=  | chi_0 |  =  || chi_0 ||_2
  //

  bl1_snrm2( i_one,
             chi_0, i_one,
             &abs_chi_0 );

  //
  // Compute lambda:
  //
  //   lambda := sqrt( conj(chi0) chi0 + x1' x1 - y2' y2 )
  //

  lambda = ( float ) sqrt( abs_chi_0 * abs_chi_0 + 
                           norm_x_1  * norm_x_1  -
                           norm_y_2  * norm_y_2 );

  // Compute alpha:
  //
  //   alpha := - lambda * chi_0 / | chi_0 |
  //          = -sign( chi_0 ) * lambda
  //

  alpha = -ssign( *chi_0 ) * lambda;


  //
  // Overwrite x_1 and y_2 with u_1 and v_2, respectively:
  //
  //   x_1 := x_1 / ( chi_0 - alpha )
  //   y_2 := y_2 / -( chi_0 - alpha )
  //

  chi_0_minus_alpha = (*chi_0) - alpha;

  bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                 m_x1,
                 &chi_0_minus_alpha,
                 x1, inc_x1 );

  neg_chi_0_minus_alpha = -chi_0_minus_alpha;

  bl1_sinvscalv( BLIS1_NO_CONJUGATE,
                 m_y2,
                 &neg_chi_0_minus_alpha,
                 y2, inc_y2 );

  //
  // Compute tau:
  //
  //   tau := ( 1 + u_1' * u_1 - v_2' * v_2 ) / 2
  //        = ( ( chi_1 - alpha ) * conj( chi_1 - alpha ) + x_1' * x_1 - y_2' * y_2 ) /
  //          ( 2 * ( chi_1 - alpha ) * conj( chi_1 - alpha ) )
  //        = ( | chi_1 - alpha |^2 + || x_2 ||_2^2 - || y_2 ||_2^2 ) /
  //          ( 2 * | chi_1 - alpha |^2 )
  //

  abs_sq_chi_0_minus_alpha = chi_0_minus_alpha * chi_0_minus_alpha;

  *tau = ( abs_sq_chi_0_minus_alpha +
           norm_x_1 * norm_x_1 -
           norm_y_2 * norm_y_2 ) /
         ( 2.0F * abs_sq_chi_0_minus_alpha );

  //
  // Overwrite chi_0 with alpha:
  //
  //   chi_0 := alpha
  //

  *chi_0 = alpha;

  return FLA_SUCCESS;
}
示例#21
0
FLA_Bool FLA_Obj_has_nan( FLA_Obj A )
{
  FLA_Datatype datatype;
  dim_t        i, j, m, n, cs, rs;
  
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Obj_has_nan_check( A );

  datatype = FLA_Obj_datatype( A );
  m        = FLA_Obj_length( A );
  n        = FLA_Obj_width( A );
  cs       = FLA_Obj_col_stride( A );
  rs       = FLA_Obj_row_stride( A );

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float *buff = ( float * ) FLA_FLOAT_PTR( A );

      for ( j=0; j<n; ++j )
        for ( i=0; i<m; ++i ) 
        {
          float val = buff[i*cs + j*rs];
          if ( val != val ) return TRUE;
        }
      break;
    }
    case FLA_DOUBLE:
    {
      double *buff = ( double * ) FLA_DOUBLE_PTR( A );

      for ( j=0; j<n; ++j )
        for ( i=0; i<m; ++i ) 
        {
          double val = buff[i*cs + j*rs];
          if ( val != val ) return TRUE;
        }
      break;
    }
    case FLA_COMPLEX:
    {
      scomplex *buff = ( scomplex * ) FLA_COMPLEX_PTR( A );

      for ( j=0; j<n; ++j )
        for ( i=0; i<m; ++i ) 
        {
          scomplex val = buff[i*cs + j*rs];
          if ( val.real != val.real || val.imag != val.imag ) return TRUE;
        }
      break;
    }
    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex *buff = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );

      for ( j=0; j<n; ++j )
        for ( i=0; i<m; ++i ) 
        {
          dcomplex val = buff[i*cs + j*rs];
          if ( val.real != val.real || val.imag != val.imag ) return TRUE;
        }
      break;
    }
  }

  return FALSE;
}
FLA_Error FLA_Bidiag_blk_external( FLA_Obj A, FLA_Obj tu, FLA_Obj tv )
{
  int          info = 0;
#ifdef FLA_ENABLE_EXTERNAL_LAPACK_INTERFACES
  FLA_Datatype datatype;
  int          m_A, n_A, cs_A;
  int          min_m_n, max_m_n;
  int          lwork;
  FLA_Obj      d, e, work_obj;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
    FLA_Bidiag_check( A, tu, tv );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  min_m_n  = FLA_Obj_min_dim( A );
  max_m_n  = FLA_Obj_max_dim( A );
  cs_A     = FLA_Obj_col_stride( A );

  FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), min_m_n,     1, 0, 0, &d );
  FLA_Obj_create( FLA_Obj_datatype_proj_to_real( A ), min_m_n - 1, 1, 0, 0, &e );

  lwork    = (m_A + n_A) * FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN );
  FLA_Obj_create( datatype, lwork, 1, 0, 0, &work_obj );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float* buff_A    = ( float * ) FLA_FLOAT_PTR( A );
    float* buff_d    = ( float * ) FLA_FLOAT_PTR( d );
    float* buff_e    = ( float * ) FLA_FLOAT_PTR( e );
    float* buff_tu   = ( float * ) FLA_FLOAT_PTR( tu );
    float* buff_tv   = ( float * ) FLA_FLOAT_PTR( tv );
    float* buff_work = ( float * ) FLA_FLOAT_PTR( work_obj );

    F77_sgebrd( &m_A,
                &n_A,
                buff_A, &cs_A,
                buff_d,
                buff_e,
                buff_tu,
                buff_tv,
                buff_work,
                &lwork,
                &info );

    break;
  }

  case FLA_DOUBLE:
  {
    double* buff_A    = ( double * ) FLA_DOUBLE_PTR( A );
    double* buff_d    = ( double * ) FLA_DOUBLE_PTR( d );
    double* buff_e    = ( double * ) FLA_DOUBLE_PTR( e );
    double* buff_tu   = ( double * ) FLA_DOUBLE_PTR( tu );
    double* buff_tv   = ( double * ) FLA_DOUBLE_PTR( tv );
    double* buff_work = ( double * ) FLA_DOUBLE_PTR( work_obj );

    F77_dgebrd( &m_A,
                &n_A,
                buff_A, &cs_A,
                buff_d,
                buff_e,
                buff_tu,
                buff_tv,
                buff_work,
                &lwork,
                &info );

    break;
  } 

  case FLA_COMPLEX:
  {
    scomplex* buff_A    = ( scomplex * ) FLA_COMPLEX_PTR( A );
    float*    buff_d    = ( float    * ) FLA_FLOAT_PTR( d );
    float*    buff_e    = ( float    * ) FLA_FLOAT_PTR( e );
    scomplex* buff_tu   = ( scomplex * ) FLA_COMPLEX_PTR( tu );
    scomplex* buff_tv   = ( scomplex * ) FLA_COMPLEX_PTR( tv );
    scomplex* buff_work = ( scomplex * ) FLA_COMPLEX_PTR( work_obj );

    F77_cgebrd( &m_A,
                &n_A,
                buff_A, &cs_A,
                buff_d,
                buff_e,
                buff_tu,
                buff_tv,
                buff_work,
                &lwork,
                &info );

    break;
  } 

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex* buff_A    = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    double*   buff_d    = ( double   * ) FLA_DOUBLE_PTR( d );
    double*   buff_e    = ( double   * ) FLA_DOUBLE_PTR( e );
    dcomplex* buff_tu   = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( tu );
    dcomplex* buff_tv   = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( tv );
    dcomplex* buff_work = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( work_obj );

    F77_zgebrd( &m_A,
                &n_A,
                buff_A, &cs_A,
                buff_d,
                buff_e,
                buff_tu,
                buff_tv,
                buff_work,
                &lwork,
                &info );

    break;
  } 

  }

  FLA_Obj_free( &d );
  FLA_Obj_free( &e );
  FLA_Obj_free( &work_obj );
#else
  FLA_Check_error_code( FLA_EXTERNAL_LAPACK_NOT_IMPLEMENTED );
#endif

  return info;
}
示例#23
0
FLA_Bool FLA_Obj_le( FLA_Obj A, FLA_Obj B )
{
  FLA_Datatype datatype_A;
  FLA_Datatype datatype_B;
  FLA_Datatype datatype;

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Obj_le_check( A, B );

  datatype_A = FLA_Obj_datatype( A );
  datatype_B = FLA_Obj_datatype( B );

  if ( datatype_A != FLA_CONSTANT ) datatype = datatype_A;
  else                              datatype = datatype_B;

  switch ( datatype )
  {
    case FLA_CONSTANT:
    {
      // We require ALL floating-point fields to be the same.
      float*    buffs_A = ( float    * ) FLA_FLOAT_PTR( A );
      float*    buffs_B = ( float    * ) FLA_FLOAT_PTR( B );
      double*   buffd_A = ( double   * ) FLA_DOUBLE_PTR( A );
      double*   buffd_B = ( double   * ) FLA_DOUBLE_PTR( B );
      scomplex* buffc_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
      scomplex* buffc_B = ( scomplex * ) FLA_COMPLEX_PTR( B );
      dcomplex* buffz_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buffz_B = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( B );

      if ( !( *buffs_A      <= *buffs_B      &&
              *buffd_A      <= *buffd_B      &&
              buffc_A->real <= buffc_B->real &&
              buffc_A->imag <= buffc_B->imag &&
              buffz_A->real <= buffz_B->real &&
              buffz_A->imag <= buffz_B->imag ) )
      {
        return FALSE;
      }

      break;
    }

    case FLA_INT:
    {
      int *buff_A = ( int * ) FLA_INT_PTR( A );
      int *buff_B = ( int * ) FLA_INT_PTR( B );

      if ( !( *buff_A <= *buff_B ) ) return FALSE;

      break;
    }

    case FLA_FLOAT:
    {
      float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
      float *buff_B = ( float * ) FLA_FLOAT_PTR( B );

      if ( !( *buff_A <= *buff_B ) ) return FALSE;

      break;
    }

    case FLA_DOUBLE:
    {
      double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
      double *buff_B = ( double * ) FLA_DOUBLE_PTR( B );

      if ( !( *buff_A <= *buff_B ) ) return FALSE;

      break;
    }

  }

  return TRUE;
}
FLA_Error FLA_Apply_G_lf_blk_var3( FLA_Obj G, FLA_Obj A, dim_t b_alg )
{
    FLA_Datatype datatype;
    int          k_G, m_A, n_A;
    int          rs_G, cs_G;
    int          rs_A, cs_A;

    datatype = FLA_Obj_datatype( A );

    k_G      = FLA_Obj_width( G );
    rs_G     = FLA_Obj_row_stride( G );
    cs_G     = FLA_Obj_col_stride( G );

    n_A      = FLA_Obj_length( A );
    m_A      = FLA_Obj_width( A );
    cs_A     = FLA_Obj_row_stride( A );
    rs_A     = FLA_Obj_col_stride( A );

    switch ( datatype )
    {
    case FLA_FLOAT:
    {
        scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
        float*    buff_A = ( float*    ) FLA_FLOAT_PTR( A );

        FLA_Apply_G_rf_bls_var3( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A,
                                 b_alg );

        break;
    }

    case FLA_DOUBLE:
    {
        dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
        double*   buff_A = ( double*   ) FLA_DOUBLE_PTR( A );

        FLA_Apply_G_rf_bld_var3( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A,
                                 b_alg );

        break;
    }

    case FLA_COMPLEX:
    {
        scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
        scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );

        FLA_Apply_G_rf_blc_var3( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A,
                                 b_alg );

        break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
        dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
        dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );

        FLA_Apply_G_rf_blz_var3( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A,
                                 b_alg );

        break;
    }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_asm_var8( FLA_Obj G, FLA_Obj A )
/*
  Apply k sets of Givens rotations to a matrix A from the right,
  where each set takes the form:

    A := A ( G(n-1,k) ... G(1,k) G(0,k) )'
       = A G(0,k)' G(1,k)' ... G(n-1,k)'

  where Gik is the ith Givens rotation formed from the kth set,
  stored in the (i,k) entries of of G:

    Gik  =  / gamma_ik  -sigma_ik \
            \ sigma_ik   gamma_ik /

  This variant iterates in pipelined, overlapping fashion and
  applies rotations to four columns at a time.

  -FGVZ
*/
{
    FLA_Datatype datatype;
    int          k_G, m_A, n_A;
    int          rs_G, cs_G;
    int          rs_A, cs_A;

    datatype = FLA_Obj_datatype( A );

    k_G      = FLA_Obj_width( G );
    m_A      = FLA_Obj_length( A );
    n_A      = FLA_Obj_width( A );

    rs_G     = FLA_Obj_row_stride( G );
    cs_G     = FLA_Obj_col_stride( G );

    rs_A     = FLA_Obj_row_stride( A );
    cs_A     = FLA_Obj_col_stride( A );

    switch ( datatype )
    {
    case FLA_FLOAT:
    {
        scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
        float*    buff_A = ( float*    ) FLA_FLOAT_PTR( A );

        FLA_Apply_G_rf_ass_var8( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );

        break;
    }

    case FLA_DOUBLE:
    {
        dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
        double*   buff_A = ( double*   ) FLA_DOUBLE_PTR( A );

        FLA_Apply_G_rf_asd_var8( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );

        break;
    }

    case FLA_COMPLEX:
    {
        scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
        scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );

        FLA_Apply_G_rf_asc_var8( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );

        break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
        dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
        dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );

        FLA_Apply_G_rf_asz_var8( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );

        break;
    }
    }

    return FLA_SUCCESS;
}
示例#26
0
FLA_Error FLA_Tevd_v_opt_var2( dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg )
{
	FLA_Error    r_val = FLA_SUCCESS;
	FLA_Datatype datatype;
	int          m_A, m_U, n_G;
	int          inc_d;
	int          inc_e;
	int          rs_G, cs_G;
	int          rs_R, cs_R;
	int          rs_U, cs_U;
	int          rs_W, cs_W;

	datatype = FLA_Obj_datatype( U );

	m_A       = FLA_Obj_vector_dim( d );
	m_U       = FLA_Obj_length( U );
	n_G       = FLA_Obj_width( G );

	inc_d     = FLA_Obj_vector_inc( d );
	inc_e     = FLA_Obj_vector_inc( e );
	
	rs_G      = FLA_Obj_row_stride( G );
	cs_G      = FLA_Obj_col_stride( G );

	rs_R      = FLA_Obj_row_stride( R );
	cs_R      = FLA_Obj_col_stride( R );

	rs_W      = FLA_Obj_row_stride( W );
	cs_W      = FLA_Obj_col_stride( W );

	rs_U      = FLA_Obj_row_stride( U );
	cs_U      = FLA_Obj_col_stride( U );


	switch ( datatype )
	{
		case FLA_FLOAT:
		{
			float*    buff_d = FLA_FLOAT_PTR( d );
			float*    buff_e = FLA_FLOAT_PTR( e );
			scomplex* buff_G = FLA_COMPLEX_PTR( G );
			float*    buff_R = FLA_FLOAT_PTR( R );
			float*    buff_W = FLA_FLOAT_PTR( W );
			float*    buff_U = FLA_FLOAT_PTR( U );

			r_val = FLA_Tevd_v_ops_var2( m_A,
			                             m_U,
			                             n_G,
			                             n_iter_max,
			                             buff_d, inc_d,
			                             buff_e, inc_e,
			                             buff_G, rs_G, cs_G,
			                             buff_R, rs_R, cs_R,
			                             buff_W, rs_W, cs_W,
			                             buff_U, rs_U, cs_U,
			                             b_alg );

			break;
		}

		case FLA_DOUBLE:
		{
			double*   buff_d = FLA_DOUBLE_PTR( d );
			double*   buff_e = FLA_DOUBLE_PTR( e );
			dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
			double*   buff_R = FLA_DOUBLE_PTR( R );
			double*   buff_W = FLA_DOUBLE_PTR( W );
			double*   buff_U = FLA_DOUBLE_PTR( U );

			r_val = FLA_Tevd_v_opd_var2( m_A,
			                             m_U,
			                             n_G,
			                             n_iter_max,
			                             buff_d, inc_d,
			                             buff_e, inc_e,
			                             buff_G, rs_G, cs_G,
			                             buff_R, rs_R, cs_R,
			                             buff_W, rs_W, cs_W,
			                             buff_U, rs_U, cs_U,
			                             b_alg );

			break;
		}

		case FLA_COMPLEX:
		{
			float*    buff_d = FLA_FLOAT_PTR( d );
			float*    buff_e = FLA_FLOAT_PTR( e );
			scomplex* buff_G = FLA_COMPLEX_PTR( G );
			float*    buff_R = FLA_FLOAT_PTR( R );
			scomplex* buff_W = FLA_COMPLEX_PTR( W );
			scomplex* buff_U = FLA_COMPLEX_PTR( U );

			r_val = FLA_Tevd_v_opc_var2( m_A,
			                             m_U,
			                             n_G,
			                             n_iter_max,
			                             buff_d, inc_d,
			                             buff_e, inc_e,
			                             buff_G, rs_G, cs_G,
			                             buff_R, rs_R, cs_R,
			                             buff_W, rs_W, cs_W,
			                             buff_U, rs_U, cs_U,
			                             b_alg );

			break;
		}

		case FLA_DOUBLE_COMPLEX:
		{
			double*   buff_d = FLA_DOUBLE_PTR( d );
			double*   buff_e = FLA_DOUBLE_PTR( e );
			dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
			double*   buff_R = FLA_DOUBLE_PTR( R );
			dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
			dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );

			r_val = FLA_Tevd_v_opz_var2( m_A,
			                             m_U,
			                             n_G,
			                             n_iter_max,
			                             buff_d, inc_d,
			                             buff_e, inc_e,
			                             buff_G, rs_G, cs_G,
			                             buff_R, rs_R, cs_R,
			                             buff_W, rs_W, cs_W,
			                             buff_U, rs_U, cs_U,
			                             b_alg );

			break;
		}
	}

	return r_val;
}
示例#27
0
FLA_Error FLA_Add_to_diag( void* diag_value, FLA_Obj A )
{
  FLA_Datatype datatype;
  dim_t        j, min_m_n;
  dim_t        rs, cs;

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Add_to_diag_check( diag_value, A );

  datatype = FLA_Obj_datatype( A );
  min_m_n  = FLA_Obj_min_dim( A );
  rs       = FLA_Obj_row_stride( A );
  cs       = FLA_Obj_col_stride( A );

  switch ( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A    = ( float * ) FLA_FLOAT_PTR( A );
    float *value_ptr = ( float * ) diag_value;

    for ( j = 0; j < min_m_n; j++ )
      buff_A[ j*cs + j*rs ] += *value_ptr;

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A    = ( double * ) FLA_DOUBLE_PTR( A );
    double *value_ptr = ( double * ) diag_value;

    for ( j = 0; j < min_m_n; j++ )
      buff_A[ j*cs + j*rs ] += *value_ptr;

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A    = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *value_ptr = ( scomplex * ) diag_value;

    for ( j = 0; j < min_m_n; j++ )
    {
      buff_A[ j*cs + j*rs ].real += value_ptr->real;
      buff_A[ j*cs + j*rs ].imag += value_ptr->imag;
    }

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A    = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *value_ptr = ( dcomplex * ) diag_value;

    for ( j = 0; j < min_m_n; j++ )
    {
      buff_A[ j*cs + j*rs ].real += value_ptr->real;
      buff_A[ j*cs + j*rs ].imag += value_ptr->imag;
    }

    break;
  }

  }

  return FLA_SUCCESS;
}
示例#28
0
// According to the sorted order of a given vector s,
// U and V are reordered in columns while C is reordered
// in rows when they need to be applied.
FLA_Error FLA_Sort_bsvd_ext( FLA_Direct direct, FLA_Obj s,
                             FLA_Bool apply_U, FLA_Obj U,
                             FLA_Bool apply_V, FLA_Obj V,
                             FLA_Bool apply_C, FLA_Obj C )
{
    FLA_Datatype datatype;
    dim_t        m_U, rs_U, cs_U;
    dim_t        m_V, rs_V, cs_V;
    dim_t        n_C, rs_C, cs_C;
    dim_t        m_s, inc_s;

    //if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    //	FLA_Sort_bsvd_check( direct, s,
    //                         apply_U, U,
    //                         apply_V, V,
    //                         apply_C, C );

    // Sort singular values only; quick sort
    if ( apply_U == FALSE && apply_V == FALSE  )
        return FLA_Sort( direct, s );

    // s dimensions must be provided.
    m_s      = FLA_Obj_vector_dim( s );
    inc_s    = FLA_Obj_vector_inc( s );

    // Datatype of U, V and C must be consistent and must be defined from one of them.
    FLA_SORT_BSVD_EXT_DEFINE_OBJ_VARIABLES( U, apply_U, datatype, m_U, FLA_Obj_length, rs_U, cs_U );
    FLA_SORT_BSVD_EXT_DEFINE_OBJ_VARIABLES( V, apply_V, datatype, m_V, FLA_Obj_length, rs_V, cs_V );
    FLA_SORT_BSVD_EXT_DEFINE_OBJ_VARIABLES( C, apply_C, datatype, n_C, FLA_Obj_width,  rs_C, cs_C );
    
    switch ( datatype )
    {
    case FLA_FLOAT:
    {
        float* s_p = ( float* ) FLA_FLOAT_PTR( s );
        float* U_p = ( apply_U == TRUE ? ( float* ) FLA_FLOAT_PTR( U ) : NULL );
        float* V_p = ( apply_V == TRUE ? ( float* ) FLA_FLOAT_PTR( V ) : NULL );
        float* C_p = ( apply_C == TRUE ? ( float* ) FLA_FLOAT_PTR( C ) : NULL );

        if ( direct == FLA_FORWARD )
            FLA_Sort_bsvd_ext_f_ops( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        else // if ( direct == FLA_BACKWARD )
            FLA_Sort_bsvd_ext_b_ops( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        break;
    }
    case FLA_DOUBLE:
    {
        double* s_p = ( double* ) FLA_DOUBLE_PTR( s );
        double* U_p = ( apply_U == TRUE ? ( double* ) FLA_DOUBLE_PTR( U ) : NULL );
        double* V_p = ( apply_V == TRUE ? ( double* ) FLA_DOUBLE_PTR( V ) : NULL );
        double* C_p = ( apply_C == TRUE ? ( double* ) FLA_DOUBLE_PTR( C ) : NULL );

        if ( direct == FLA_FORWARD )
            FLA_Sort_bsvd_ext_f_opd( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        else // if ( direct == FLA_BACKWARD )
            FLA_Sort_bsvd_ext_b_opd( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        break;
    }
    case FLA_COMPLEX:
    {
        float*    s_p = ( float*    ) FLA_FLOAT_PTR( s );
        scomplex* U_p = ( apply_U == TRUE ? ( scomplex* ) FLA_COMPLEX_PTR( U ) : NULL );
        scomplex* V_p = ( apply_V == TRUE ? ( scomplex* ) FLA_COMPLEX_PTR( V ) : NULL );
        scomplex* C_p = ( apply_C == TRUE ? ( scomplex* ) FLA_COMPLEX_PTR( C ) : NULL );

        if ( direct == FLA_FORWARD )
            FLA_Sort_bsvd_ext_f_opc( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        else // if ( direct == FLA_BACKWARD )
            FLA_Sort_bsvd_ext_b_opc( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        break;
    }
    case FLA_DOUBLE_COMPLEX:
    {
        double*   s_p = ( double*   ) FLA_DOUBLE_PTR( s );
        dcomplex* U_p = ( apply_U == TRUE ? ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( U ) : NULL );
        dcomplex* V_p = ( apply_V == TRUE ? ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( V ) : NULL );
        dcomplex* C_p = ( apply_C == TRUE ? ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( C ) : NULL );

        if ( direct == FLA_FORWARD )
            FLA_Sort_bsvd_ext_f_opz( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        else // if ( direct == FLA_BACKWARD )
            FLA_Sort_bsvd_ext_b_opz( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        break;
    }
    }
    return FLA_SUCCESS;
}
示例#29
0
FLA_Error FLA_Hess_UT_step_opt_var4( FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T )
{
  FLA_Datatype datatype;
  int          m_A, m_T;
  int          rs_A, cs_A;
  int          rs_Y, cs_Y;
  int          rs_Z, cs_Z;
  int          rs_T, cs_T;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  m_T      = FLA_Obj_length( T );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_Y     = FLA_Obj_row_stride( Y );
  cs_Y     = FLA_Obj_col_stride( Y );

  rs_Z     = FLA_Obj_row_stride( Z );
  cs_Z     = FLA_Obj_col_stride( Z );

  rs_T     = FLA_Obj_row_stride( T );
  cs_T     = FLA_Obj_col_stride( T );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_Y = FLA_FLOAT_PTR( Y );
      float* buff_Z = FLA_FLOAT_PTR( Z );
      float* buff_T = FLA_FLOAT_PTR( T );

      FLA_Hess_UT_step_ops_var4( m_A,
                                 m_T,
                                 buff_A, rs_A, cs_A,
                                 buff_Y, rs_Y, cs_Y,
                                 buff_Z, rs_Z, cs_Z,
                                 buff_T, rs_T, cs_T );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_Y = FLA_DOUBLE_PTR( Y );
      double* buff_Z = FLA_DOUBLE_PTR( Z );
      double* buff_T = FLA_DOUBLE_PTR( T );

      FLA_Hess_UT_step_opd_var4( m_A,
                                 m_T,
                                 buff_A, rs_A, cs_A,
                                 buff_Y, rs_Y, cs_Y,
                                 buff_Z, rs_Z, cs_Z,
                                 buff_T, rs_T, cs_T );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
      scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
      scomplex* buff_T = FLA_COMPLEX_PTR( T );

      FLA_Hess_UT_step_opc_var4( m_A,
                                 m_T,
                                 buff_A, rs_A, cs_A,
                                 buff_Y, rs_Y, cs_Y,
                                 buff_Z, rs_Z, cs_Z,
                                 buff_T, rs_T, cs_T );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
      dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
      dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );

      FLA_Hess_UT_step_opz_var4( m_A,
                                 m_T,
                                 buff_A, rs_A, cs_A,
                                 buff_Y, rs_Y, cs_Y,
                                 buff_Z, rs_Z, cs_Z,
                                 buff_T, rs_T, cs_T );

      break;
    }
  }

  return FLA_SUCCESS;
}
示例#30
0
FLA_Error FLA_Trmm_external_gpu( FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu )
{
  FLA_Datatype datatype;
  int          m_B, n_B;
  int          ldim_A;
  int          ldim_B;
  char         blas_side; 
  char         blas_uplo;
  char         blas_trans;
  char         blas_diag;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Trmm_check( side, uplo, trans, diag, alpha, A, B );

  if ( FLA_Obj_has_zero_dim( B ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  ldim_A   = FLA_Obj_length( A );

  m_B      = FLA_Obj_length( B );
  n_B      = FLA_Obj_width( B );
  ldim_B   = FLA_Obj_length( B );

  FLA_Param_map_flame_to_netlib_side( side, &blas_side );
  FLA_Param_map_flame_to_netlib_uplo( uplo, &blas_uplo );
  FLA_Param_map_flame_to_netlib_trans( trans, &blas_trans );
  FLA_Param_map_flame_to_netlib_diag( diag, &blas_diag );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );

    cublasStrmm( blas_side,
                 blas_uplo, 
                 blas_trans,
                 blas_diag,
                 m_B,
                 n_B,
                 *buff_alpha,
                 ( float * ) A_gpu, ldim_A,
                 ( float * ) B_gpu, ldim_B );
    
    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );

    cublasDtrmm( blas_side,
                 blas_uplo, 
                 blas_trans,
                 blas_diag,
                 m_B,
                 n_B,
                 *buff_alpha,
                 ( double * ) A_gpu, ldim_A,
                 ( double * ) B_gpu, ldim_B );

    break;
  }

  case FLA_COMPLEX:
  {
    cuComplex *buff_alpha = ( cuComplex * ) FLA_COMPLEX_PTR( alpha );

    cublasCtrmm( blas_side,
                 blas_uplo, 
                 blas_trans,
                 blas_diag,
                 m_B,
                 n_B,
                 *buff_alpha,
                 ( cuComplex * ) A_gpu, ldim_A,
                 ( cuComplex * ) B_gpu, ldim_B );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    cuDoubleComplex *buff_alpha = ( cuDoubleComplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );

    cublasZtrmm( blas_side,
                 blas_uplo, 
                 blas_trans,
                 blas_diag,
                 m_B,
                 n_B,
                 *buff_alpha,
                 ( cuDoubleComplex * ) A_gpu, ldim_A,
                 ( cuDoubleComplex * ) B_gpu, ldim_B );

    break;
  }

  }

  return FLA_SUCCESS;
}