コード例 #1
0
FLA_Error FLA_Tevd_eigval_v_opt_var1( FLA_Obj G, FLA_Obj d, FLA_Obj e, FLA_Obj k )
{
	FLA_Datatype datatype;
	int          m_A, n_G;
	int          rs_G, cs_G;
	int          inc_d;
	int          inc_e;

	datatype = FLA_Obj_datatype( d );

	m_A      = FLA_Obj_vector_dim( d );
	n_G      = FLA_Obj_width( G );

	rs_G     = FLA_Obj_row_stride( G );
	cs_G     = FLA_Obj_col_stride( G );

	inc_d    = FLA_Obj_vector_inc( d );
	inc_e    = FLA_Obj_vector_inc( e );
	

	switch ( datatype )
	{
		case FLA_FLOAT:
		{
			scomplex* buff_G = FLA_COMPLEX_PTR( G );
			float*    buff_d = FLA_FLOAT_PTR( d );
			float*    buff_e = FLA_FLOAT_PTR( e );
			int*      buff_k = FLA_INT_PTR( k );

			FLA_Tevd_eigval_v_ops_var1( m_A,
			                            n_G,
			                            buff_G, rs_G, cs_G,
			                            buff_d, inc_d,
			                            buff_e, inc_e,
			                            buff_k );

			break;
		}

		case FLA_DOUBLE:
		{
			dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
			double*   buff_d = FLA_DOUBLE_PTR( d );
			double*   buff_e = FLA_DOUBLE_PTR( e );
			int*      buff_k = FLA_INT_PTR( k );

			FLA_Tevd_eigval_v_opd_var1( m_A,
			                            n_G,
			                            buff_G, rs_G, cs_G,
			                            buff_d, inc_d,
			                            buff_e, inc_e,
			                            buff_k );

			break;
		}
	}

	return FLA_SUCCESS;
}
コード例 #2
0
int FLAME_invert_ztau( FLA_Obj t )
{
    dim_t     m    = FLA_Obj_vector_dim( t );
    dim_t     inc  = FLA_Obj_vector_inc( t );
    dcomplex* buff = FLA_Obj_buffer_at_view( t );
    double    one  = 1.0;
    double    conjsign = one; // if conjugate -one;
    double    zero = 0.0;
    double    temp, s, xr_s, xi_s;
    dcomplex* chi;
    int       i;

    for ( i = 0; i < m; ++i )
    {
        chi  = buff + i*inc;
        s    = bl1_fmaxabs( chi->real, chi->imag );
        if ( s != zero )
        {
            xr_s = chi->real / s;
            xi_s = chi->imag / s;
            temp = xr_s * chi->real + xi_s * chi->imag;
            chi->real =            xr_s / temp;
            chi->imag = conjsign * xi_s / temp;
        }
    }
    return 0;
}
コード例 #3
0
// Transform tau.
int FLAME_invert_stau( FLA_Obj t )
{
    dim_t  m    = FLA_Obj_vector_dim( t );
    dim_t  inc  = FLA_Obj_vector_inc( t );
    float* buff = FLA_Obj_buffer_at_view( t );
    float  one  = 1.0F;
    float  zero = 0.0F;
    float* chi;
    int    i;

    for ( i = 0; i < m; ++i )
    {
        chi = buff + i*inc;
        if ( *chi != zero )
            *chi = ( one / *chi );
    }
    return 0;
}
コード例 #4
0
FLA_Error FLA_Fused_Gerc2_opt_var1( FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A )
{
/*
   Effective computation:
   A = A + alpha * ( u * y' + z * v' );
*/
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_u, inc_y, inc_z, inc_v;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_u    = FLA_Obj_vector_inc( u );
  inc_y    = FLA_Obj_vector_inc( y );
  inc_z    = FLA_Obj_vector_inc( z );
  inc_v    = FLA_Obj_vector_inc( v );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_u = FLA_FLOAT_PTR( u );
      float* buff_y = FLA_FLOAT_PTR( y );
      float* buff_z = FLA_FLOAT_PTR( z );
      float* buff_v = FLA_FLOAT_PTR( v );
      float* buff_alpha = FLA_FLOAT_PTR( alpha );

      FLA_Fused_Gerc2_ops_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_u = FLA_DOUBLE_PTR( u );
      double* buff_y = FLA_DOUBLE_PTR( y );
      double* buff_z = FLA_DOUBLE_PTR( z );
      double* buff_v = FLA_DOUBLE_PTR( v );
      double* buff_alpha = FLA_DOUBLE_PTR( alpha );

      FLA_Fused_Gerc2_opd_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_u = FLA_COMPLEX_PTR( u );
      scomplex* buff_y = FLA_COMPLEX_PTR( y );
      scomplex* buff_z = FLA_COMPLEX_PTR( z );
      scomplex* buff_v = FLA_COMPLEX_PTR( v );
      scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha );

      FLA_Fused_Gerc2_opc_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
      dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y );
      dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z );
      dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v );
      dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha );

      FLA_Fused_Gerc2_opz_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }
  }

  return FLA_SUCCESS;
}
コード例 #5
0
FLA_Error FLA_Apply_pivots_rt_opt_var1( FLA_Obj p, FLA_Obj A )
{
  FLA_Datatype datatype;
  int          m_A;
  int          rs_A, cs_A;
  int          inc_p;
  int          k1_0, k2_0;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );

  // Swap the stride; FLA_Apply_pivots_ln_ops_var1 already consider the memory access pattern.
  cs_A     = FLA_Obj_row_stride( A );
  rs_A     = FLA_Obj_col_stride( A );

  // Use minus increment of the ln version.
  inc_p    = FLA_Obj_vector_inc( p );

  // Use zero-based indices.
  k1_0     = 0;
  k2_0     = ( int ) FLA_Obj_vector_dim( p ) - 1;

  switch ( datatype )
  {
    case FLA_INT:
    {
      int*   buff_A = FLA_INT_PTR( A );
      int*   buff_p = FLA_INT_PTR( p );

      FLA_Apply_pivots_ln_opi_var1( m_A,
                                    buff_A, rs_A, cs_A,
                                    k1_0,
                                    k2_0,
                                    buff_p, inc_p );

      break;
    }

    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      int*   buff_p = FLA_INT_PTR( p );

      FLA_Apply_pivots_ln_ops_var1( m_A,
                                    buff_A, rs_A, cs_A,
                                    k1_0,
                                    k2_0,
                                    buff_p, inc_p );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      int*    buff_p = FLA_INT_PTR( p );

      FLA_Apply_pivots_ln_opd_var1( m_A,
                                    buff_A, rs_A, cs_A,
                                    k1_0,
                                    k2_0,
                                    buff_p, inc_p );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      int*      buff_p = FLA_INT_PTR( p );

      FLA_Apply_pivots_ln_opc_var1( m_A,
                                    buff_A, rs_A, cs_A,
                                    k1_0,
                                    k2_0,
                                    buff_p, inc_p );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      int*      buff_p = FLA_INT_PTR( p );

      FLA_Apply_pivots_ln_opz_var1( m_A,
                                    buff_A, rs_A, cs_A,
                                    k1_0,
                                    k2_0,
                                    buff_p, inc_p );

      break;
    }
  }

  return FLA_SUCCESS;
}
コード例 #6
0
FLA_Error FLA_Eig_gest_nl_opt_var5( FLA_Obj A, FLA_Obj Y, FLA_Obj B )
{
  FLA_Datatype datatype;
  int          m_AB;
  int          rs_A, cs_A;
  int          rs_B, cs_B;
  int          inc_y;
  FLA_Obj      yT, yB;

  datatype = FLA_Obj_datatype( A );

  m_AB     = FLA_Obj_length( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  rs_B     = FLA_Obj_row_stride( B );
  cs_B     = FLA_Obj_col_stride( B );
 
  FLA_Part_2x1( Y,    &yT,
                      &yB,     1, FLA_TOP );

  inc_y    = FLA_Obj_vector_inc( yT );

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_y = FLA_FLOAT_PTR( yT );
      float* buff_B = FLA_FLOAT_PTR( B );

      FLA_Eig_gest_nl_ops_var5( m_AB,
                                buff_A, rs_A, cs_A,
                                buff_y, inc_y,
                                buff_B, rs_B, cs_B );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_y = FLA_DOUBLE_PTR( yT );
      double* buff_B = FLA_DOUBLE_PTR( B );

      FLA_Eig_gest_nl_opd_var5( m_AB,
                                buff_A, rs_A, cs_A,
                                buff_y, inc_y,
                                buff_B, rs_B, cs_B );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_y = FLA_COMPLEX_PTR( yT );
      scomplex* buff_B = FLA_COMPLEX_PTR( B );

      FLA_Eig_gest_nl_opc_var5( m_AB,
                                buff_A, rs_A, cs_A,
                                buff_y, inc_y,
                                buff_B, rs_B, cs_B );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( yT );
      dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );

      FLA_Eig_gest_nl_opz_var5( m_AB,
                                buff_A, rs_A, cs_A,
                                buff_y, inc_y,
                                buff_B, rs_B, cs_B );

      break;
    }
  }

  return FLA_SUCCESS;
}
コード例 #7
0
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opt_var1( FLA_Obj A, FLA_Obj u, FLA_Obj tau, FLA_Obj a, FLA_Obj beta, FLA_Obj y, FLA_Obj w )
{
/*
   Effective computation:
   y = beta * y + A' * u;
   a = a - conj(y) / tau;
   w = A * conj(a);
*/
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_u, inc_a, inc_y, inc_w;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_u    = FLA_Obj_vector_inc( u );

  inc_a    = FLA_Obj_vector_inc( a );

  inc_y    = FLA_Obj_vector_inc( y );

  inc_w    = FLA_Obj_vector_inc( w );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A   = FLA_FLOAT_PTR( A );
      float* buff_u   = FLA_FLOAT_PTR( u );
      float* buff_a   = FLA_FLOAT_PTR( a );
      float* buff_y   = FLA_FLOAT_PTR( y );
      float* buff_w   = FLA_FLOAT_PTR( w );
      float* buff_tau = FLA_FLOAT_PTR( tau );
      float* buff_beta = FLA_FLOAT_PTR( beta );

      FLA_Fused_Ahx_Axpy_Ax_ops_var1( m_A,
                                      n_A,
                                      buff_tau,
                                      buff_beta,
                                      buff_A, rs_A, cs_A,
                                      buff_u, inc_u,
                                      buff_a, inc_a,
                                      buff_y, inc_y,
                                      buff_w, inc_w );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A   = FLA_DOUBLE_PTR( A );
      double* buff_u   = FLA_DOUBLE_PTR( u );
      double* buff_a   = FLA_DOUBLE_PTR( a );
      double* buff_y   = FLA_DOUBLE_PTR( y );
      double* buff_w   = FLA_DOUBLE_PTR( w );
      double* buff_tau = FLA_DOUBLE_PTR( tau );
      double* buff_beta = FLA_DOUBLE_PTR( beta );

      FLA_Fused_Ahx_Axpy_Ax_opd_var1( m_A,
                                      n_A,
                                      buff_tau,
                                      buff_beta,
                                      buff_A, rs_A, cs_A,
                                      buff_u, inc_u,
                                      buff_a, inc_a,
                                      buff_y, inc_y,
                                      buff_w, inc_w );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A   = FLA_COMPLEX_PTR( A );
      scomplex* buff_u   = FLA_COMPLEX_PTR( u );
      scomplex* buff_a   = FLA_COMPLEX_PTR( a );
      scomplex* buff_y   = FLA_COMPLEX_PTR( y );
      scomplex* buff_w   = FLA_COMPLEX_PTR( w );
      scomplex* buff_tau = FLA_COMPLEX_PTR( tau );
      scomplex* buff_beta = FLA_COMPLEX_PTR( beta );

      FLA_Fused_Ahx_Axpy_Ax_opc_var1( m_A,
                                      n_A,
                                      buff_tau,
                                      buff_beta,
                                      buff_A, rs_A, cs_A,
                                      buff_u, inc_u,
                                      buff_a, inc_a,
                                      buff_y, inc_y,
                                      buff_w, inc_w );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A   = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_u   = FLA_DOUBLE_COMPLEX_PTR( u );
      dcomplex* buff_a   = FLA_DOUBLE_COMPLEX_PTR( a );
      dcomplex* buff_y   = FLA_DOUBLE_COMPLEX_PTR( y );
      dcomplex* buff_w   = FLA_DOUBLE_COMPLEX_PTR( w );
      dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau );
      dcomplex* buff_beta = FLA_DOUBLE_COMPLEX_PTR( beta );

      FLA_Fused_Ahx_Axpy_Ax_opz_var1( m_A,
                                      n_A,
                                      buff_tau,
                                      buff_beta,
                                      buff_A, rs_A, cs_A,
                                      buff_u, inc_u,
                                      buff_a, inc_a,
                                      buff_y, inc_y,
                                      buff_w, inc_w );

      break;
    }
  }

  return FLA_SUCCESS;
}
コード例 #8
0
FLA_Error FLA_LU_piv_opt_var5( FLA_Obj A, FLA_Obj p )
{
  FLA_Error    r_val = FLA_SUCCESS;
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_p;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_p    = FLA_Obj_vector_inc( p );


  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      int*   buff_p = FLA_INT_PTR( p );

      r_val = FLA_LU_piv_ops_var5( m_A,
                                   n_A,
                                   buff_A, rs_A, cs_A,
                                   buff_p, inc_p );
      
      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      int*    buff_p = FLA_INT_PTR( p );

      r_val = FLA_LU_piv_opd_var5( m_A,
                                   n_A,
                                   buff_A, rs_A, cs_A,
                                   buff_p, inc_p );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      int*      buff_p = FLA_INT_PTR( p );

      r_val = FLA_LU_piv_opc_var5( m_A,
                                   n_A,
                                   buff_A, rs_A, cs_A,
                                   buff_p, inc_p );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      int*      buff_p = FLA_INT_PTR( p );

      r_val = FLA_LU_piv_opz_var5( m_A,
                                   n_A,
                                   buff_A, rs_A, cs_A,
                                   buff_p, inc_p );
      
      break;
    }
  }

  return r_val;
}
コード例 #9
0
// According to the sorted order of a given vector s,
// U and V are reordered in columns while C is reordered
// in rows when they need to be applied.
FLA_Error FLA_Sort_bsvd_ext( FLA_Direct direct, FLA_Obj s,
                             FLA_Bool apply_U, FLA_Obj U,
                             FLA_Bool apply_V, FLA_Obj V,
                             FLA_Bool apply_C, FLA_Obj C )
{
    FLA_Datatype datatype;
    dim_t        m_U, rs_U, cs_U;
    dim_t        m_V, rs_V, cs_V;
    dim_t        n_C, rs_C, cs_C;
    dim_t        m_s, inc_s;

    //if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    //	FLA_Sort_bsvd_check( direct, s,
    //                         apply_U, U,
    //                         apply_V, V,
    //                         apply_C, C );

    // Sort singular values only; quick sort
    if ( apply_U == FALSE && apply_V == FALSE  )
        return FLA_Sort( direct, s );

    // s dimensions must be provided.
    m_s      = FLA_Obj_vector_dim( s );
    inc_s    = FLA_Obj_vector_inc( s );

    // Datatype of U, V and C must be consistent and must be defined from one of them.
    FLA_SORT_BSVD_EXT_DEFINE_OBJ_VARIABLES( U, apply_U, datatype, m_U, FLA_Obj_length, rs_U, cs_U );
    FLA_SORT_BSVD_EXT_DEFINE_OBJ_VARIABLES( V, apply_V, datatype, m_V, FLA_Obj_length, rs_V, cs_V );
    FLA_SORT_BSVD_EXT_DEFINE_OBJ_VARIABLES( C, apply_C, datatype, n_C, FLA_Obj_width,  rs_C, cs_C );
    
    switch ( datatype )
    {
    case FLA_FLOAT:
    {
        float* s_p = ( float* ) FLA_FLOAT_PTR( s );
        float* U_p = ( apply_U == TRUE ? ( float* ) FLA_FLOAT_PTR( U ) : NULL );
        float* V_p = ( apply_V == TRUE ? ( float* ) FLA_FLOAT_PTR( V ) : NULL );
        float* C_p = ( apply_C == TRUE ? ( float* ) FLA_FLOAT_PTR( C ) : NULL );

        if ( direct == FLA_FORWARD )
            FLA_Sort_bsvd_ext_f_ops( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        else // if ( direct == FLA_BACKWARD )
            FLA_Sort_bsvd_ext_b_ops( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        break;
    }
    case FLA_DOUBLE:
    {
        double* s_p = ( double* ) FLA_DOUBLE_PTR( s );
        double* U_p = ( apply_U == TRUE ? ( double* ) FLA_DOUBLE_PTR( U ) : NULL );
        double* V_p = ( apply_V == TRUE ? ( double* ) FLA_DOUBLE_PTR( V ) : NULL );
        double* C_p = ( apply_C == TRUE ? ( double* ) FLA_DOUBLE_PTR( C ) : NULL );

        if ( direct == FLA_FORWARD )
            FLA_Sort_bsvd_ext_f_opd( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        else // if ( direct == FLA_BACKWARD )
            FLA_Sort_bsvd_ext_b_opd( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        break;
    }
    case FLA_COMPLEX:
    {
        float*    s_p = ( float*    ) FLA_FLOAT_PTR( s );
        scomplex* U_p = ( apply_U == TRUE ? ( scomplex* ) FLA_COMPLEX_PTR( U ) : NULL );
        scomplex* V_p = ( apply_V == TRUE ? ( scomplex* ) FLA_COMPLEX_PTR( V ) : NULL );
        scomplex* C_p = ( apply_C == TRUE ? ( scomplex* ) FLA_COMPLEX_PTR( C ) : NULL );

        if ( direct == FLA_FORWARD )
            FLA_Sort_bsvd_ext_f_opc( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        else // if ( direct == FLA_BACKWARD )
            FLA_Sort_bsvd_ext_b_opc( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        break;
    }
    case FLA_DOUBLE_COMPLEX:
    {
        double*   s_p = ( double*   ) FLA_DOUBLE_PTR( s );
        dcomplex* U_p = ( apply_U == TRUE ? ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( U ) : NULL );
        dcomplex* V_p = ( apply_V == TRUE ? ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( V ) : NULL );
        dcomplex* C_p = ( apply_C == TRUE ? ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( C ) : NULL );

        if ( direct == FLA_FORWARD )
            FLA_Sort_bsvd_ext_f_opz( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        else // if ( direct == FLA_BACKWARD )
            FLA_Sort_bsvd_ext_b_opz( m_s, s_p, inc_s,
                                     m_U, U_p, rs_U, cs_U,
                                     m_V, V_p, rs_V, cs_V,
                                     n_C, C_p, rs_C, cs_C );
        break;
    }
    }
    return FLA_SUCCESS;
}
コード例 #10
0
FLA_Error FLA_Bidiag_UT_u_extract_real_diagonals( FLA_Obj A, FLA_Obj d, FLA_Obj e )
{
  FLA_Datatype datatype;
  int          n_A;
  int          rs_A, cs_A;
  int          inc_d;
  int          inc_e;
  int          i;

  datatype = FLA_Obj_datatype( A );

  n_A      = FLA_Obj_width( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_d    = FLA_Obj_vector_inc( d );

  if ( n_A != 1 )
    inc_e  = FLA_Obj_vector_inc( e );
  else
    inc_e  = 0;

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float*    buff_A = FLA_FLOAT_PTR( A );
      float*    buff_d = FLA_FLOAT_PTR( d );
      float*    buff_e = ( n_A != 1 ? FLA_FLOAT_PTR( e ) : NULL );

      for ( i = 0; i < n_A; ++i )
      {
        float*    alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        float*    a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        float*    delta1   = buff_d + (i  )*inc_d;
        float*    epsilon1 = buff_e + (i  )*inc_e;

        int       n_ahead  = n_A - i - 1;

        // delta1 = alpha11;
        *delta1 = *alpha11;

        // epsilon1 = a12t_l;
        if ( n_ahead > 0 )
          *epsilon1 = *a12t_l;
      }

      break;
    }

    case FLA_DOUBLE:
    {
      double*   buff_A = FLA_DOUBLE_PTR( A );
      double*   buff_d = FLA_DOUBLE_PTR( d );
      double*   buff_e = ( n_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL );

      for ( i = 0; i < n_A; ++i )
      {
        double*   alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        double*   a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        double*   delta1   = buff_d + (i  )*inc_d;
        double*   epsilon1 = buff_e + (i  )*inc_e;

        int       n_ahead  = n_A - i - 1;

        // delta1 = alpha11;
        *delta1 = *alpha11;

        // epsilon1 = a12t_l;
        if ( n_ahead > 0 )
          *epsilon1 = *a12t_l;
      }

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      float*    buff_d = FLA_FLOAT_PTR( d );
      float*    buff_e = ( n_A != 1 ? FLA_FLOAT_PTR( e ) : NULL );

      for ( i = 0; i < n_A; ++i )
      {
        scomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        scomplex* a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        float*    delta1   = buff_d + (i  )*inc_d;
        float*    epsilon1 = buff_e + (i  )*inc_e;

        int       n_ahead  = n_A - i - 1;

        // delta1 = alpha11;
        *delta1 = alpha11->real;

        // epsilon1 = a12t_l;
        if ( n_ahead > 0 )
          *epsilon1 = a12t_l->real;
      }

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      double*   buff_d = FLA_DOUBLE_PTR( d );
      double*   buff_e = ( n_A != 1 ? FLA_DOUBLE_PTR( e ) : NULL );

      for ( i = 0; i < n_A; ++i )
      {
        dcomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        dcomplex* a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        double*   delta1   = buff_d + (i  )*inc_d;
        double*   epsilon1 = buff_e + (i  )*inc_e;

        int       n_ahead  = n_A - i - 1;

        // delta1 = alpha11;
        *delta1 = alpha11->real;

        // epsilon1 = a12t_l;
        if ( n_ahead > 0 )
          *epsilon1 = a12t_l->real;
      }

      break;
    }
  }

  return FLA_SUCCESS;
}
コード例 #11
0
ファイル: FLA_Ger_external.c プロジェクト: anaptyxis/libflame
FLA_Error FLA_Ger_external( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A )
{
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_x;
  int          inc_y;
  conj1_t       blis_conjx;
  conj1_t       blis_conjy;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Ger_check( alpha, x, y, A );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_x    = FLA_Obj_vector_inc( x );
  inc_y    = FLA_Obj_vector_inc( y );

  FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx );
  FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjy );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_x     = ( float * ) FLA_FLOAT_PTR( x );
    float *buff_y     = ( float * ) FLA_FLOAT_PTR( y );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );

    bl1_sger( blis_conjx,
              blis_conjy,
              m_A,
              n_A,
              buff_alpha,
              buff_x, inc_x,
              buff_y, inc_y,
              buff_A, rs_A, cs_A ); 

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_x     = ( double * ) FLA_DOUBLE_PTR( x );
    double *buff_y     = ( double * ) FLA_DOUBLE_PTR( y );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );

    bl1_dger( blis_conjx,
              blis_conjy,
              m_A,
              n_A,
              buff_alpha,
              buff_x, inc_x,
              buff_y, inc_y,
              buff_A, rs_A, cs_A ); 

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_x     = ( scomplex * ) FLA_COMPLEX_PTR( x );
    scomplex *buff_y     = ( scomplex * ) FLA_COMPLEX_PTR( y );
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );

    bl1_cger( blis_conjx,
              blis_conjy,
              m_A,
              n_A,
              buff_alpha,
              buff_x, inc_x,
              buff_y, inc_y,
              buff_A, rs_A, cs_A ); 

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_x     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );
    dcomplex *buff_y     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y );
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );

    bl1_zger( blis_conjx,
              blis_conjy,
              m_A,
              n_A,
              buff_alpha,
              buff_x, inc_x,
              buff_y, inc_y,
              buff_A, rs_A, cs_A ); 

    break;
  }

  }
  
  return FLA_SUCCESS;
}
コード例 #12
0
FLA_Error FLA_Bidiag_UT_realify_diagonals_opt( FLA_Obj a, FLA_Obj b, FLA_Obj d, FLA_Obj e ) 
{
  FLA_Datatype datatype;
  int          i, m, inc_a, inc_b, inc_d, inc_e;

  datatype = FLA_Obj_datatype( a );

  m        = FLA_Obj_vector_dim( a );  

  inc_a    = FLA_Obj_vector_inc( a );
  inc_b    = ( m > 1 ? FLA_Obj_vector_inc( b ) : 0 );

  inc_d    = FLA_Obj_vector_inc( d );
  inc_e    = FLA_Obj_vector_inc( e );

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_d = FLA_FLOAT_PTR( d );
      float* buff_e = FLA_FLOAT_PTR( e );
      float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
      
      bl1_ssetv( m, 
                 buff_1,
                 buff_d, inc_d );

      bl1_ssetv( m,
                 buff_1,
                 buff_e, inc_e );

      break;
    }
    case FLA_DOUBLE:
    {
      double* buff_d = FLA_DOUBLE_PTR( d );
      double* buff_e = FLA_DOUBLE_PTR( e );
      double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );

      bl1_dsetv( m,
                 buff_1,
                 buff_d, inc_d );

      bl1_dsetv( m,
                 buff_1,
                 buff_e, inc_e );

      break;
    }
    case FLA_COMPLEX:
    {
      scomplex* buff_a = FLA_COMPLEX_PTR( a );    
      scomplex* buff_b = ( m > 1 ? FLA_COMPLEX_PTR( b ) : NULL );    
      scomplex* buff_d = FLA_COMPLEX_PTR( d );    
      scomplex* buff_e = FLA_COMPLEX_PTR( e );    
      scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
      float*    buff_0 = FLA_FLOAT_PTR( FLA_ZERO ); 

      for ( i = 0; i < m; ++i )
      {
        scomplex* alpha1   = buff_a + (i  )*inc_a;
        scomplex* delta1   = buff_d + (i  )*inc_d;
        scomplex* epsilon1 = buff_e + (i  )*inc_e;

        scomplex  absv;

        if ( i == 0 )
        {
          *delta1 = *buff_1;
        }
        else
        {
          scomplex* beta1 = buff_b + (i-1)*inc_b;
          if ( beta1->imag == 0.0F )
            *delta1 = *buff_1;
          else
          {
            bl1_ccopys( BLIS1_CONJUGATE, beta1, delta1 );
            bl1_cabsval2( beta1, &absv );
            bl1_cinvscals( &absv, delta1 );

            bl1_cscals( delta1, beta1 );
            beta1->imag = *buff_0;

            bl1_cscals( delta1, alpha1 );
          }
        }

        if ( alpha1->imag == 0.0F )
          *epsilon1 = *buff_1;          
        else
        {
          bl1_ccopys( BLIS1_CONJUGATE, alpha1, epsilon1 );
          bl1_cabsval2( alpha1, &absv );
          bl1_cinvscals( &absv, epsilon1 );
          
          bl1_cscals( epsilon1, alpha1 );
          alpha1->imag = *buff_0;
        }

        if ( i < ( m - 1 ) )
        {
          scomplex* beta2 = buff_b + (i )*inc_b;
          bl1_cscals( epsilon1, beta2 );
        }
      }
      break;
    }
    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a );
      dcomplex* buff_b = ( m > 1 ? FLA_DOUBLE_COMPLEX_PTR( b ) : NULL );
      dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
      dcomplex* buff_e = FLA_DOUBLE_COMPLEX_PTR( e );
      dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
      double*   buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );

      for ( i = 0; i < m; ++i )
      {
        dcomplex* alpha1   = buff_a + (i  )*inc_a;
        dcomplex* delta1   = buff_d + (i  )*inc_d;
        dcomplex* epsilon1 = buff_e + (i  )*inc_e;

        dcomplex  absv;

        if ( i == 0 )
        {
          *delta1 = *buff_1;
        }
        else
        {
          dcomplex* beta1    = buff_b + (i-1)*inc_b;
          bl1_zcopys( BLIS1_CONJUGATE, beta1, delta1 );
          bl1_zabsval2( beta1, &absv );
          bl1_zinvscals( &absv, delta1 );

          bl1_zscals( delta1, beta1 );
          beta1->imag = *buff_0;

          bl1_zscals( delta1, alpha1 );
        }

        bl1_zcopys( BLIS1_CONJUGATE, alpha1, epsilon1 );
        bl1_zabsval2( alpha1, &absv );
        bl1_zinvscals( &absv, epsilon1 );

        bl1_zscals( epsilon1, alpha1 );
        alpha1->imag = *buff_0;

        if ( i < ( m - 1 ) )
        {
          dcomplex* beta2 = buff_b + (i  )*inc_b;
          bl1_zscals( epsilon1, beta2 );
        }
      }
      break;
    }
  }
  return FLA_SUCCESS;
}
コード例 #13
0
ファイル: FLA_Nrm2_external.c プロジェクト: pgawron/tlash
FLA_Error FLA_Nrm2_external( FLA_Obj x, FLA_Obj norm_x )
{
  FLA_Datatype datatype;
  int          num_elem;
  int          inc_x;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Nrm2_check( x, norm_x );

  if ( FLA_Obj_has_zero_dim( x ) )
  {
    FLA_Set( FLA_ZERO, norm_x );
    return FLA_SUCCESS;
  }

  datatype = FLA_Obj_datatype( x );

  inc_x    = FLA_Obj_vector_inc( x );
  num_elem = FLA_Obj_vector_dim( x );


  switch ( datatype ){

  case FLA_FLOAT:
  {
    float *buff_x      = ( float * ) FLA_FLOAT_PTR( x );
    float *buff_norm_x = ( float * ) FLA_FLOAT_PTR( norm_x );

    bli_snrm2( num_elem,
               buff_x, inc_x,
               buff_norm_x );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_x      = ( double * ) FLA_DOUBLE_PTR( x );
    double *buff_norm_x = ( double * ) FLA_DOUBLE_PTR( norm_x );

    bli_dnrm2( num_elem,
               buff_x, inc_x,
               buff_norm_x );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_x      = ( scomplex * ) FLA_COMPLEX_PTR( x );
    float    *buff_norm_x = ( float    * ) FLA_COMPLEX_PTR( norm_x );

    bli_cnrm2( num_elem,
               buff_x, inc_x,
               buff_norm_x );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_x      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );
    double   *buff_norm_x = ( double   * ) FLA_DOUBLE_COMPLEX_PTR( norm_x );

    bli_znrm2( num_elem,
               buff_x, inc_x,
               buff_norm_x );

    break;
  }

  }
  
  return FLA_SUCCESS;
}
コード例 #14
0
FLA_Error FLA_Bsvd_sinval_v_opt_var1( FLA_Obj tol, FLA_Obj thresh, 
                                      FLA_Obj G, FLA_Obj H, 
                                      FLA_Obj d, FLA_Obj e, 
                                      FLA_Obj k )
{
    FLA_Datatype datatype;
    int          m_A, n_GH;
    int          rs_G, cs_G;
    int          rs_H, cs_H;
    int          inc_d;
    int          inc_e;

    datatype = FLA_Obj_datatype( d );

    m_A      = FLA_Obj_vector_dim( d );
    n_GH     = FLA_Obj_width( G );

    rs_G     = FLA_Obj_row_stride( G );
    cs_G     = FLA_Obj_col_stride( G );

    rs_H     = FLA_Obj_row_stride( H );
    cs_H     = FLA_Obj_col_stride( H );

    inc_d    = FLA_Obj_vector_inc( d );
    inc_e    = FLA_Obj_vector_inc( e );


    switch ( datatype )
    {
    case FLA_FLOAT:
    {
        float*    buff_tol    = FLA_FLOAT_PTR( tol );
        float*    buff_thresh = FLA_FLOAT_PTR( thresh );
        scomplex* buff_G      = FLA_COMPLEX_PTR( G );
        scomplex* buff_H      = FLA_COMPLEX_PTR( H );
        float*    buff_d      = FLA_FLOAT_PTR( d );
        float*    buff_e      = FLA_FLOAT_PTR( e );
        int*      buff_k      = FLA_INT_PTR( k );

        FLA_Bsvd_sinval_v_ops_var1( m_A,
                                    n_GH,
                                    9,
                                    *buff_tol,
                                    *buff_thresh,
                                    buff_G, rs_G, cs_G,
                                    buff_H, rs_H, cs_H,
                                    buff_d, inc_d,
                                    buff_e, inc_e,
                                    buff_k );

        break;
    }

    case FLA_DOUBLE:
    {
        double*   buff_tol    = FLA_DOUBLE_PTR( tol );
        double*   buff_thresh = FLA_DOUBLE_PTR( thresh );
        dcomplex* buff_G      = FLA_DOUBLE_COMPLEX_PTR( G );
        dcomplex* buff_H      = FLA_DOUBLE_COMPLEX_PTR( H );
        double*   buff_d      = FLA_DOUBLE_PTR( d );
        double*   buff_e      = FLA_DOUBLE_PTR( e );
        int*      buff_k      = FLA_INT_PTR( k );

        FLA_Bsvd_sinval_v_opd_var1( m_A,
                                    n_GH,
                                    9,
                                    *buff_tol,
                                    *buff_thresh,
                                    buff_G, rs_G, cs_G,
                                    buff_H, rs_H, cs_H,
                                    buff_d, inc_d,
                                    buff_e, inc_e,
                                    buff_k );

        break;
    }
    }

    return FLA_SUCCESS;
}
コード例 #15
0
ファイル: FLA_Househ3UD_UT.c プロジェクト: anaptyxis/libflame
FLA_Error FLA_Househ3UD_UT( FLA_Obj chi_0, FLA_Obj x1, FLA_Obj y2, FLA_Obj tau )
/*
  Compute an up-and-downdating UT Householder transformation

          / / 1 0 0 \            / 1 0  0 \  / 1  \ ( 1  u1' v2' ) \
    H  =  | | 0 I 0 | - inv(tau) | 0 I  0 |  | u1 |                |
          \ \ 0 0 I /            \ 0 0 -I /  \ v2 /                /

  by computing tau, u1, and v2 such that the following is satisfied:

      / chi_0 \   / alpha \
    H |  x1   | = |   0   |
      \  y2   /   \   0   /

  where

    alpha  = - lambda * chi_0 / | chi_0 |

    lambda =  sqrt( conj(chi0) chi0 + x1' x1 - y2' y2 )

              / chi_0 \
    x      =  |  x1   |
              \  y2   /

    tau    =  ( 1 + u1' u1 - v2' v2 ) / 2

    u1     =  x1 / ( chi_0 - alpha )

    v2     =  -y2 / ( chi_0 - alpha )

  Upon completion, alpha, u1, and v2 have overwritten objects chi_0, x1,
  and y2, respectively.

  -FGVZ
*/
{
  FLA_Datatype datatype;
  int          m_x1;
  int          m_y2;
  int          inc_x1;
  int          inc_y2;

  datatype = FLA_Obj_datatype( x1 );

  m_x1     = FLA_Obj_vector_dim( x1 );
  m_y2     = FLA_Obj_vector_dim( y2 );
  inc_x1   = FLA_Obj_vector_inc( x1 );
  inc_y2   = FLA_Obj_vector_inc( y2 );

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Househ3UD_UT_check( chi_0, x1, y2, tau );

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* chi_0_p = ( float* ) FLA_FLOAT_PTR( chi_0 );
      float* x1_p    = ( float* ) FLA_FLOAT_PTR( x1 );
      float* y2_p    = ( float* ) FLA_FLOAT_PTR( y2 );
      float* tau_p   = ( float* ) FLA_FLOAT_PTR( tau );

      FLA_Househ3UD_UT_ops( m_x1,
                            m_y2,
                            chi_0_p,
                            x1_p, inc_x1,
                            y2_p, inc_y2,
                            tau_p );
      break;
    }

    case FLA_DOUBLE:
    {
      double* chi_0_p = ( double* ) FLA_DOUBLE_PTR( chi_0 );
      double* x1_p    = ( double* ) FLA_DOUBLE_PTR( x1 );
      double* y2_p    = ( double* ) FLA_DOUBLE_PTR( y2 );
      double* tau_p   = ( double* ) FLA_DOUBLE_PTR( tau );

      FLA_Househ3UD_UT_opd( m_x1,
                            m_y2,
                            chi_0_p,
                            x1_p, inc_x1,
                            y2_p, inc_y2,
                            tau_p );
      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* chi_0_p = ( scomplex* ) FLA_COMPLEX_PTR( chi_0 );
      scomplex* x1_p    = ( scomplex* ) FLA_COMPLEX_PTR( x1 );
      scomplex* y2_p    = ( scomplex* ) FLA_COMPLEX_PTR( y2 );
      scomplex* tau_p   = ( scomplex* ) FLA_COMPLEX_PTR( tau );

      FLA_Househ3UD_UT_opc( m_x1,
                            m_y2,
                            chi_0_p,
                            x1_p, inc_x1,
                            y2_p, inc_y2,
                            tau_p );
      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* chi_0_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( chi_0 );
      dcomplex* x1_p    = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( x1 );
      dcomplex* y2_p    = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( y2 );
      dcomplex* tau_p   = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau );

      FLA_Househ3UD_UT_opz( m_x1,
                            m_y2,
                            chi_0_p,
                            x1_p, inc_x1,
                            y2_p, inc_y2,
                            tau_p );
      break;
    }
  }

  return FLA_SUCCESS;
}
コード例 #16
0
ファイル: FLA_Gemv_external.c プロジェクト: pgawron/tlash
FLA_Error FLA_Gemv_external( FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y )
{
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_x;
  int          inc_y;
  trans_t      blis_transa;
  conj_t       blis_conjx;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Gemv_check( transa, alpha, A, x, beta, y );

  if ( FLA_Obj_has_zero_dim( A ) )
  {
    FLA_Scal_external( beta, y );
    return FLA_SUCCESS;
  }

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_x    = FLA_Obj_vector_inc( x );
  inc_y    = FLA_Obj_vector_inc( y );

  FLA_Param_map_flame_to_blis_trans( transa, &blis_transa );
  FLA_Param_map_flame_to_blis_conj( FLA_NO_CONJUGATE, &blis_conjx );


  switch( datatype ){
  
  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_x     = ( float * ) FLA_FLOAT_PTR( x );
    float *buff_y     = ( float * ) FLA_FLOAT_PTR( y );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );

    bli_sgemv( blis_transa,
               blis_conjx,
               m_A,
               n_A, 
               buff_alpha,  
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,  
               buff_y, inc_y );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_x     = ( double * ) FLA_DOUBLE_PTR( x );
    double *buff_y     = ( double * ) FLA_DOUBLE_PTR( y );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );

    bli_dgemv( blis_transa,
               blis_conjx,
               m_A,
               n_A, 
               buff_alpha,  
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,  
               buff_y, inc_y );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_x     = ( scomplex * ) FLA_COMPLEX_PTR( x );
    scomplex *buff_y     = ( scomplex * ) FLA_COMPLEX_PTR( y );
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
    scomplex *buff_beta  = ( scomplex * ) FLA_COMPLEX_PTR( beta );

    bli_cgemv( blis_transa,
               blis_conjx,
               m_A,
               n_A, 
               buff_alpha,  
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,  
               buff_y, inc_y );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_x     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );
    dcomplex *buff_y     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y );
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    dcomplex *buff_beta  = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );

    bli_zgemv( blis_transa,
               blis_conjx,
               m_A,
               n_A, 
               buff_alpha,  
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,  
               buff_y, inc_y );

    break;
  }

  }
  
  return FLA_SUCCESS;
}
コード例 #17
0
FLA_Error FLA_Apply_HUD_UT_l_opt_var1( FLA_Obj tau, FLA_Obj w12t,
                                                    FLA_Obj r12t,
                                       FLA_Obj u1,  FLA_Obj C2,
                                       FLA_Obj v1,  FLA_Obj D2 )
{
  FLA_Datatype datatype;
  int          m_u1_C2;
  int          m_v1_D2;
  int          n_r12t;
  int          inc_u1;
  int          inc_v1;
  int          inc_w12t;
  int          inc_r12t;
  int          rs_C2, cs_C2;
  int          rs_D2, cs_D2;

  if ( FLA_Obj_has_zero_dim( r12t ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( C2 );

  m_u1_C2  = FLA_Obj_length( u1 );
  m_v1_D2  = FLA_Obj_length( v1 );
  n_r12t   = FLA_Obj_width( r12t );
  inc_w12t = FLA_Obj_vector_inc( w12t );
  inc_r12t = FLA_Obj_vector_inc( r12t );
  inc_u1   = FLA_Obj_vector_inc( u1 );
  rs_C2    = FLA_Obj_row_stride( C2 );
  cs_C2    = FLA_Obj_col_stride( C2 );
  inc_v1   = FLA_Obj_vector_inc( v1 );
  rs_D2    = FLA_Obj_row_stride( D2 );
  cs_D2    = FLA_Obj_col_stride( D2 );

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* tau_p  = ( float* ) FLA_FLOAT_PTR( tau );
      float* w12t_p = ( float* ) FLA_FLOAT_PTR( w12t );
      float* r12t_p = ( float* ) FLA_FLOAT_PTR( r12t );
      float* u1_p   = ( float* ) FLA_FLOAT_PTR( u1 );
      float* C2_p   = ( float* ) FLA_FLOAT_PTR( C2 );
      float* v1_p   = ( float* ) FLA_FLOAT_PTR( v1 );
      float* D2_p   = ( float* ) FLA_FLOAT_PTR( D2 );

      FLA_Apply_HUD_UT_l_ops_var1( m_u1_C2,
                                   m_v1_D2,
                                   n_r12t,
                                   tau_p,
                                   w12t_p, inc_w12t,
                                   r12t_p, inc_r12t,
                                   u1_p, inc_u1,
                                   C2_p, rs_C2, cs_C2,
                                   v1_p, inc_v1,
                                   D2_p, rs_D2, cs_D2 );
      break;
    }

    case FLA_DOUBLE:
    {
      double* tau_p  = ( double* ) FLA_DOUBLE_PTR( tau );
      double* w12t_p = ( double* ) FLA_DOUBLE_PTR( w12t );
      double* r12t_p = ( double* ) FLA_DOUBLE_PTR( r12t );
      double* u1_p   = ( double* ) FLA_DOUBLE_PTR( u1 );
      double* C2_p   = ( double* ) FLA_DOUBLE_PTR( C2 );
      double* v1_p   = ( double* ) FLA_DOUBLE_PTR( v1 );
      double* D2_p   = ( double* ) FLA_DOUBLE_PTR( D2 );

      FLA_Apply_HUD_UT_l_opd_var1( m_u1_C2,
                                   m_v1_D2,
                                   n_r12t,
                                   tau_p,
                                   w12t_p, inc_w12t,
                                   r12t_p, inc_r12t,
                                   u1_p, inc_u1,
                                   C2_p, rs_C2, cs_C2,
                                   v1_p, inc_v1,
                                   D2_p, rs_D2, cs_D2 );
      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* tau_p  = ( scomplex* ) FLA_COMPLEX_PTR( tau );
      scomplex* w12t_p = ( scomplex* ) FLA_COMPLEX_PTR( w12t );
      scomplex* r12t_p = ( scomplex* ) FLA_COMPLEX_PTR( r12t );
      scomplex* u1_p   = ( scomplex* ) FLA_COMPLEX_PTR( u1 );
      scomplex* C2_p   = ( scomplex* ) FLA_COMPLEX_PTR( C2 );
      scomplex* v1_p   = ( scomplex* ) FLA_COMPLEX_PTR( v1 );
      scomplex* D2_p   = ( scomplex* ) FLA_COMPLEX_PTR( D2 );

      FLA_Apply_HUD_UT_l_opc_var1( m_u1_C2,
                                   m_v1_D2,
                                   n_r12t,
                                   tau_p,
                                   w12t_p, inc_w12t,
                                   r12t_p, inc_r12t,
                                   u1_p, inc_u1,
                                   C2_p, rs_C2, cs_C2,
                                   v1_p, inc_v1,
                                   D2_p, rs_D2, cs_D2 );
      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* tau_p  = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau );
      dcomplex* w12t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( w12t );
      dcomplex* r12t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( r12t );
      dcomplex* u1_p   = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( u1 );
      dcomplex* C2_p   = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( C2 );
      dcomplex* v1_p   = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( v1 );
      dcomplex* D2_p   = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( D2 );

      FLA_Apply_HUD_UT_l_opz_var1( m_u1_C2,
                                   m_v1_D2,
                                   n_r12t,
                                   tau_p,
                                   w12t_p, inc_w12t,
                                   r12t_p, inc_r12t,
                                   u1_p, inc_u1,
                                   C2_p, rs_C2, cs_C2,
                                   v1_p, inc_v1,
                                   D2_p, rs_D2, cs_D2 );
      break;
    }
  }

  return FLA_SUCCESS;
}
コード例 #18
0
FLA_Error FLA_Tevd_v_opt_var2( dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg )
{
	FLA_Error    r_val = FLA_SUCCESS;
	FLA_Datatype datatype;
	int          m_A, m_U, n_G;
	int          inc_d;
	int          inc_e;
	int          rs_G, cs_G;
	int          rs_R, cs_R;
	int          rs_U, cs_U;
	int          rs_W, cs_W;

	datatype = FLA_Obj_datatype( U );

	m_A       = FLA_Obj_vector_dim( d );
	m_U       = FLA_Obj_length( U );
	n_G       = FLA_Obj_width( G );

	inc_d     = FLA_Obj_vector_inc( d );
	inc_e     = FLA_Obj_vector_inc( e );
	
	rs_G      = FLA_Obj_row_stride( G );
	cs_G      = FLA_Obj_col_stride( G );

	rs_R      = FLA_Obj_row_stride( R );
	cs_R      = FLA_Obj_col_stride( R );

	rs_W      = FLA_Obj_row_stride( W );
	cs_W      = FLA_Obj_col_stride( W );

	rs_U      = FLA_Obj_row_stride( U );
	cs_U      = FLA_Obj_col_stride( U );


	switch ( datatype )
	{
		case FLA_FLOAT:
		{
			float*    buff_d = FLA_FLOAT_PTR( d );
			float*    buff_e = FLA_FLOAT_PTR( e );
			scomplex* buff_G = FLA_COMPLEX_PTR( G );
			float*    buff_R = FLA_FLOAT_PTR( R );
			float*    buff_W = FLA_FLOAT_PTR( W );
			float*    buff_U = FLA_FLOAT_PTR( U );

			r_val = FLA_Tevd_v_ops_var2( m_A,
			                             m_U,
			                             n_G,
			                             n_iter_max,
			                             buff_d, inc_d,
			                             buff_e, inc_e,
			                             buff_G, rs_G, cs_G,
			                             buff_R, rs_R, cs_R,
			                             buff_W, rs_W, cs_W,
			                             buff_U, rs_U, cs_U,
			                             b_alg );

			break;
		}

		case FLA_DOUBLE:
		{
			double*   buff_d = FLA_DOUBLE_PTR( d );
			double*   buff_e = FLA_DOUBLE_PTR( e );
			dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
			double*   buff_R = FLA_DOUBLE_PTR( R );
			double*   buff_W = FLA_DOUBLE_PTR( W );
			double*   buff_U = FLA_DOUBLE_PTR( U );

			r_val = FLA_Tevd_v_opd_var2( m_A,
			                             m_U,
			                             n_G,
			                             n_iter_max,
			                             buff_d, inc_d,
			                             buff_e, inc_e,
			                             buff_G, rs_G, cs_G,
			                             buff_R, rs_R, cs_R,
			                             buff_W, rs_W, cs_W,
			                             buff_U, rs_U, cs_U,
			                             b_alg );

			break;
		}

		case FLA_COMPLEX:
		{
			float*    buff_d = FLA_FLOAT_PTR( d );
			float*    buff_e = FLA_FLOAT_PTR( e );
			scomplex* buff_G = FLA_COMPLEX_PTR( G );
			float*    buff_R = FLA_FLOAT_PTR( R );
			scomplex* buff_W = FLA_COMPLEX_PTR( W );
			scomplex* buff_U = FLA_COMPLEX_PTR( U );

			r_val = FLA_Tevd_v_opc_var2( m_A,
			                             m_U,
			                             n_G,
			                             n_iter_max,
			                             buff_d, inc_d,
			                             buff_e, inc_e,
			                             buff_G, rs_G, cs_G,
			                             buff_R, rs_R, cs_R,
			                             buff_W, rs_W, cs_W,
			                             buff_U, rs_U, cs_U,
			                             b_alg );

			break;
		}

		case FLA_DOUBLE_COMPLEX:
		{
			double*   buff_d = FLA_DOUBLE_PTR( d );
			double*   buff_e = FLA_DOUBLE_PTR( e );
			dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
			double*   buff_R = FLA_DOUBLE_PTR( R );
			dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
			dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );

			r_val = FLA_Tevd_v_opz_var2( m_A,
			                             m_U,
			                             n_G,
			                             n_iter_max,
			                             buff_d, inc_d,
			                             buff_e, inc_e,
			                             buff_G, rs_G, cs_G,
			                             buff_R, rs_R, cs_R,
			                             buff_W, rs_W, cs_W,
			                             buff_U, rs_U, cs_U,
			                             b_alg );

			break;
		}
	}

	return r_val;
}
コード例 #19
0
FLA_Error FLA_Fused_Ahx_Ax_opt_var1( FLA_Obj A, FLA_Obj x, FLA_Obj v, FLA_Obj w )
{
/*
   Effective computation:
   v = A' * x;
   w = A  * x;
*/
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_x, inc_v, inc_w;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_x    = FLA_Obj_vector_inc( x );

  inc_v    = FLA_Obj_vector_inc( v );

  inc_w    = FLA_Obj_vector_inc( w );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_x = FLA_FLOAT_PTR( x );
      float* buff_v = FLA_FLOAT_PTR( v );
      float* buff_w = FLA_FLOAT_PTR( w );

      FLA_Fused_Ahx_Ax_ops_var1( m_A,
                                 n_A,
                                 buff_A, rs_A, cs_A,
                                 buff_x, inc_x,
                                 buff_v, inc_v,
                                 buff_w, inc_w );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_x = FLA_DOUBLE_PTR( x );
      double* buff_v = FLA_DOUBLE_PTR( v );
      double* buff_w = FLA_DOUBLE_PTR( w );

      FLA_Fused_Ahx_Ax_opd_var1( m_A,
                                 n_A,
                                 buff_A, rs_A, cs_A,
                                 buff_x, inc_x,
                                 buff_v, inc_v,
                                 buff_w, inc_w );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_x = FLA_COMPLEX_PTR( x );
      scomplex* buff_v = FLA_COMPLEX_PTR( v );
      scomplex* buff_w = FLA_COMPLEX_PTR( w );

      FLA_Fused_Ahx_Ax_opc_var1( m_A,
                                 n_A,
                                 buff_A, rs_A, cs_A,
                                 buff_x, inc_x,
                                 buff_v, inc_v,
                                 buff_w, inc_w );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_x = FLA_DOUBLE_COMPLEX_PTR( x );
      dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v );
      dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );

      FLA_Fused_Ahx_Ax_opz_var1( m_A,
                                 n_A,
                                 buff_A, rs_A, cs_A,
                                 buff_x, inc_x,
                                 buff_v, inc_v,
                                 buff_w, inc_w );

      break;
    }
  }

  return FLA_SUCCESS;
}
コード例 #20
0
ファイル: FLA_Sort.c プロジェクト: pgawron/tlash
FLA_Error FLA_Sort( FLA_Direct direct, FLA_Obj x )
{
	FLA_Datatype datatype;
	FLA_Obj      x_use;
	dim_t        m_x;
	dim_t        inc_x;

	if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
		FLA_Sort_check( direct, x );

	datatype = FLA_Obj_datatype( x );

	m_x      = FLA_Obj_vector_dim( x );
	inc_x    = FLA_Obj_vector_inc( x );

	// If the vector does not have unit stride, copy it to a temporary vector
	// that does have unit stride.
	if ( inc_x != 1 )
	{
		FLA_Obj_create_copy_of( FLA_NO_TRANSPOSE, x, &x_use );
		inc_x = FLA_Obj_vector_inc( x_use );
	}
	else
	{
		x_use = x;
	}

	switch ( datatype )
	{
		case FLA_FLOAT:
		{
			float* x_p = ( float* ) FLA_FLOAT_PTR( x_use );

			if ( direct == FLA_FORWARD )
				FLA_Sort_f_ops( m_x,
				                x_p, inc_x );
			else // if ( direct == FLA_BACKWARD )
				FLA_Sort_b_ops( m_x,
				                x_p, inc_x );

			break;
		}

		case FLA_DOUBLE:
		{
			double* x_p = ( double* ) FLA_DOUBLE_PTR( x_use );

			if ( direct == FLA_FORWARD )
				FLA_Sort_f_opd( m_x,
				                x_p, inc_x );
			else // if ( direct == FLA_BACKWARD )
				FLA_Sort_b_opd( m_x,
				                x_p, inc_x );

			break;
		}

	}

	if ( inc_x != 1 )
	{
		FLA_Copy( x_use, x );
		FLA_Obj_free( &x_use );
	}

	return FLA_SUCCESS;
}
コード例 #21
0
FLA_Error FLA_Trmv_external( FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj A, FLA_Obj x ) 
{
  FLA_Datatype datatype;
  int          m_A;
  int          rs_A, cs_A;
  int          inc_x;
  uplo1_t       blis_uplo;
  trans1_t      blis_trans;
  diag1_t       blis_diag;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Trmv_check( uplo, trans, diag, A, x );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_x    = FLA_Obj_vector_inc( x );

  FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
  FLA_Param_map_flame_to_blis_trans( trans, &blis_trans );
  FLA_Param_map_flame_to_blis_diag( diag, &blis_diag );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_x = ( float * ) FLA_FLOAT_PTR( x );

    bl1_strmv( blis_uplo,
               blis_trans,
               blis_diag,
               m_A,
               buff_A, rs_A, cs_A,
               buff_x, inc_x );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_x = ( double * ) FLA_DOUBLE_PTR( x );

    bl1_dtrmv( blis_uplo,
               blis_trans,
               blis_diag,
               m_A,
               buff_A, rs_A, cs_A,
               buff_x, inc_x );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_x = ( scomplex * ) FLA_COMPLEX_PTR( x );

    bl1_ctrmv( blis_uplo,
               blis_trans,
               blis_diag,
               m_A,
               buff_A, rs_A, cs_A,
               buff_x, inc_x );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_x = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );

    bl1_ztrmv( blis_uplo,
               blis_trans,
               blis_diag,
               m_A,
               buff_A, rs_A, cs_A,
               buff_x, inc_x );

    break;
  }

  }

  return FLA_SUCCESS;
}
コード例 #22
0
ファイル: FLA_Herc_external.c プロジェクト: pgawron/tlash
FLA_Error FLA_Herc_external( FLA_Uplo uplo, FLA_Conj conj, FLA_Obj alpha, FLA_Obj x, FLA_Obj A )
{
  FLA_Datatype datatype;
  int          m_A;
  int          rs_A, cs_A;
  int          inc_x; 
  uplo_t       blis_uplo;
  conj_t       blis_conj;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Herc_check( uplo, conj, alpha, x, A );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_x    = FLA_Obj_vector_inc( x );

  FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );
  FLA_Param_map_flame_to_blis_conj( conj, &blis_conj );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_x     = ( float * ) FLA_FLOAT_PTR( x );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );

    bli_ssyr( blis_uplo,
              m_A,
              buff_alpha,
              buff_x, inc_x,
              buff_A, rs_A, cs_A ); 
    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_x     = ( double * ) FLA_DOUBLE_PTR( x );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );

    bli_dsyr( blis_uplo,
              m_A,
              buff_alpha,
              buff_x, inc_x,
              buff_A, rs_A, cs_A ); 
    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_x     = ( scomplex * ) FLA_COMPLEX_PTR( x );
    float    *buff_alpha = ( float    * ) FLA_FLOAT_PTR( alpha );

    bli_cher( blis_uplo,
              blis_conj,
              m_A,
              buff_alpha,
              buff_x, inc_x,
              buff_A, rs_A, cs_A ); 

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_x     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );
    double   *buff_alpha = ( double   * ) FLA_DOUBLE_PTR( alpha );

    bli_zher( blis_uplo,
              blis_conj,
              m_A,
              buff_alpha,
              buff_x, inc_x,
              buff_A, rs_A, cs_A ); 

    break;
  }

  }

  return FLA_SUCCESS;
}
コード例 #23
0
FLA_Error FLA_Symv_external( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y )
{
  FLA_Datatype datatype;
  int          m_A;
  int          rs_A, cs_A;
  int          inc_x;
  int          inc_y;
  uplo1_t       blis_uplo;

  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) 
    FLA_Symv_check( uplo, alpha, A, x, beta, y );

  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_x    = FLA_Obj_vector_inc( x );
  inc_y    = FLA_Obj_vector_inc( y );

  FLA_Param_map_flame_to_blis_uplo( uplo, &blis_uplo );


  switch( datatype ){

  case FLA_FLOAT:
  {
    float *buff_A     = ( float * ) FLA_FLOAT_PTR( A );
    float *buff_x     = ( float * ) FLA_FLOAT_PTR( x );
    float *buff_y     = ( float * ) FLA_FLOAT_PTR( y );
    float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha );
    float *buff_beta  = ( float * ) FLA_FLOAT_PTR( beta );

    bl1_ssymv( blis_uplo,
               m_A,
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,
               buff_y, inc_y );

    break;
  }

  case FLA_DOUBLE:
  {
    double *buff_A     = ( double * ) FLA_DOUBLE_PTR( A );
    double *buff_x     = ( double * ) FLA_DOUBLE_PTR( x );
    double *buff_y     = ( double * ) FLA_DOUBLE_PTR( y );
    double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha );
    double *buff_beta  = ( double * ) FLA_DOUBLE_PTR( beta );

    bl1_dsymv( blis_uplo,
               m_A,
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,
               buff_y, inc_y );

    break;
  }

  case FLA_COMPLEX:
  {
    scomplex *buff_A     = ( scomplex * ) FLA_COMPLEX_PTR( A );
    scomplex *buff_x     = ( scomplex * ) FLA_COMPLEX_PTR( x );
    scomplex *buff_y     = ( scomplex * ) FLA_COMPLEX_PTR( y );
    scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha );
    scomplex *buff_beta  = ( scomplex * ) FLA_COMPLEX_PTR( beta );

    bl1_csymv( blis_uplo,
               m_A,
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,
               buff_y, inc_y );

    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex *buff_A     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( A );
    dcomplex *buff_x     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( x );
    dcomplex *buff_y     = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( y );
    dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha );
    dcomplex *buff_beta  = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( beta );

    bl1_zsymv( blis_uplo,
               m_A,
               buff_alpha,
               buff_A, rs_A, cs_A, 
               buff_x, inc_x,
               buff_beta,
               buff_y, inc_y );

    break;
  }

  }

  return FLA_SUCCESS;
}