Exemple #1
0
 inline float log(const float& a0)
 {
   typedef float A0;
   if (a0 == Inf<A0>()) return a0;
   if (iseqz(a0)) return Minf<A0>();
   if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>();
   float x, fe, x2, y;
   kernel_log(a0, fe, x, x2, y);
   y = fma(fe, Const<float, 0xb95e8083>(), y);
   y = fma(Mhalf<A0>(), x2, y);
   A0 z  = x + y;
   return fma(Const<float, 0x3f318000>(), fe, z);
 }
Exemple #2
0
static t_int *op_perf1(t_int *w) {
	t_operatord *x = (t_operatord *)(w[1]);
	t_sample *in = (t_sample *)(w[2]);
	t_sample *out = (t_sample *)(w[3]);
	int n = (int)(w[4]);
	t_sample *mul = x->invals[0].vec;
	t_float add = x->invals[1].val;
	float *tab = cos_table, *addr, f1, f2, frac;
	
    double dphase;
    int normhipart;
    union tabfudge tf;
    
    tf.tf_d = UNITBIT32;
    normhipart = tf.tf_i[HIOFFSET];


        dphase = (double)(*in++ * (float)(COSTABSIZE)) + UNITBIT32;
        tf.tf_d = dphase;
        addr = tab + (tf.tf_i[HIOFFSET] & (COSTABSIZE-1));
        tf.tf_i[HIOFFSET] = normhipart;
    while (--n)
    {
        dphase = (double)(*in++ * (float)(COSTABSIZE)) + UNITBIT32;
            frac = tf.tf_d - UNITBIT32;
        tf.tf_d = dphase;
            f1 = addr[0];
            f2 = addr[1];
        addr = tab + (tf.tf_i[HIOFFSET] & (COSTABSIZE-1));
        #ifdef FP_FAST_FMA
        	dphase = fma(frac, f2 - f1, f1);
            *out++ = fma(dphase, (*mul++), add);
        	#else
            dphase = f1 + frac * (f2 - f1);
            *out++ = dphase*(*mul++) + add;
            #endif
        tf.tf_i[HIOFFSET] = normhipart;
    }
            frac = tf.tf_d - UNITBIT32;
            f1 = addr[0];
            f2 = addr[1];
            #ifdef FP_FAST_FMA
        	dphase = fma(frac, f2 - f1, f1);
            *out++ = fma(dphase, (*mul++), add);
        	#else
            dphase = f1 + frac * (f2 - f1);
            *out++ = dphase*(*mul++) + add;
            #endif
    return (w+5);
}
Exemple #3
0
      inline void kernel_log(const float& a0,
			     float& fe,
			     float& x,
			     float& x2,
			     float& y)
      {
        typedef float A0;
        typedef meta::as_integer<A0, signed>::type int_type;
        int_type e;
        boost::fusion::tie(x, e) = fast_frexp(a0);
        int_type x_lt_sqrthf = -(Const<float, 0x3f3504f3>() > x);
        e += x_lt_sqrthf;
        //     if (x_lt_sqrthf) x+= x;
        //     x += Const<float, 0xbf800000>();
        x += b_and(x, genmask<float>(x_lt_sqrthf))+Const<float,0xbf800000>();
        x2 = sqr(x);
        A0 y1 = fma(Const<float, 0x3d9021bb>() ,x2,Const<float, 0x3def251a>() );
        A0 y2 = fma(Const<float, 0xbdebd1b8>() ,x2,Const<float, 0xbdfe5d4f>() );
        y1 = fma(y1,x2,Const<float, 0x3e11e9bf>() );
        y2 = fma(y2,x2,Const<float, 0xbe2aae50>() );
        y1 = fma(y1,x2,Const<float, 0x3e4cceac>() );
        y2 = fma(y2,x2,Const<float, 0xbe7ffffc>() );
        y1 = fma(y1,x2,Const<float, 0x3eaaaaaa>() );
        y = fma(x,y2,y1)*x*x2;
        fe = tofloat(e);
      }
void Adaptive::updateProbsBasic(const double wgt)
{
    const double pStar = probability[currentIndex];
    const double alpha = fdim(1.0,pStar), beta = pStar;
    vector<double>::iterator it;
    const vector<double>::const_iterator itIndex = (probability.begin() + currentIndex);

    for (it = probability.begin(); it != probability.end(); it++)
    {
        if ( it == itIndex )
            *it *= fma(wgt,alpha,1.0);
        else
            *it *= fma(-wgt,beta,1.0);
    }
}
Exemple #5
0
 inline
 fvar<typename stan::return_type<T1, T2, T3>::type>
 fma(const fvar<T1>& x1, const fvar<T2>& x2, const fvar<T3>& x3) {
   return fvar<typename stan::return_type<T1, T2, T3>::type>
     (fma(x1.val_, x2.val_, x3.val_),
      x1.d_ * x2.val_ + x2.d_ * x1.val_ + x3.d_);
 }
Exemple #6
0
 result_type operator()(A0& yi, A1& inputs) const
 {
   yi.resize(inputs.extent());
   const child0 & x   =  boost::proto::child_c<0>(inputs);
   if (numel(x) <=  1)
     BOOST_ASSERT_MSG(numel(x) >  1, "Interpolation requires at least two sample points in each dimension.");
   else
   {
     BOOST_ASSERT_MSG(issorted(x, 'a'), "for 'linear' interpolation x values must be sorted in ascending order");
     const child1 & y   =  boost::proto::child_c<1>(inputs);
     BOOST_ASSERT_MSG(numel(x) == numel(y), "The grid vectors do not define a grid of points that match the given values.");
     const child2 & xi  =  boost::proto::child_c<2>(inputs);
     bool extrap = false;
     value_type extrapval = Nan<value_type>();
     choices(inputs, extrap, extrapval, N1());
     table<index_type>   index = bsearch (x, xi);
     table<value_type>  dx    =  xi-x(index);
     yi =  fma(oneminus(dx), y(index), dx*y(oneplus(index)));
     value_type  b =  value_type(x(begin_));
     value_type  e =  value_type(x(end_));
     if (!extrap) yi = nt2::if_else(nt2::logical_or(boost::simd::is_nge(xi, b),
                                                    boost::simd::is_nle(xi, e)), extrapval, yi);
   }
   return yi;
 }
Exemple #7
0
 static inline A0_n kernel_atan(const A0_n a0_n)
 {
   typedef typename meta::scalar_of<A0>::type sA0;
   const A0 tan3pio8  = double_constant<A0, 0x4003504f333f9de6ll>();
   const A0 tanpio8 = double_constant<A0, 0x3fda827999fcef31ll>();
   const A0 a0 = {a0_n};
   const A0 x =  nt2::abs(a0);
   const bA0 flag1 = lt(x,  tan3pio8);             
   const bA0 flag2 = logical_and(ge(x, tanpio8), flag1); 
   A0 yy = if_zero_else(flag1, Pio_2<A0>());
   yy = select(flag2, Pio_4<A0>(), yy);
   A0 xx = select(flag1, x, -rec(x));
   xx = select(flag2, minusone(x)/oneplus(x),xx);
   A0 z = sqr(xx);
   z = z*horner< NT2_HORNER_COEFF_T(sA0, 5,
                                (0xbfec007fa1f72594ll,
                                 0xc03028545b6b807all,
                                 0xc052c08c36880273ll,
                                 0xc05eb8bf2d05ba25ll,
                                 0xc0503669fd28ec8ell)
                                )>(z)/
       horner< NT2_HORNER_COEFF_T(sA0, 6,
                                (0x3ff0000000000000ll,
                                 0x4038dbc45b14603cll,
                                 0x4064a0dd43b8fa25ll,
                                 0x407b0e18d2e2be3bll,
                                 0x407e563f13b049eall,
                                 0x4068519efbbd62ecll)
                                )>(z);
   z = fma(xx, z, xx);
   const A0 morebits = double_constant<A0, 0x3c91a62633145c07ll>();
   z = seladd(flag2, z, mul(Half<A0>(), morebits));
   z = z+if_zero_else(flag1, morebits);
   return yy + z;
 }
Exemple #8
0
int main(void) {
	double r1 = 0.1 * 10 - 1;
	double r2 = fma(0.1, 10, -1);
	print_bin_double(r1);
	print_bin_double(r2);
	long int i;
//	printf("off;reg;fma;expect\n");
//	for(i = 1; i < 1L << 62; i *= 10) {
//		printf("%ld;", i);
//		printf("%.15G;", (i * M_PI + M_PI_2) - (i * M_PI - M_PI_2) - M_PI);
//		printf("%.15G;%.15G\n", (1* fma(i, M_PI, +M_PI_2) - fma(i, M_PI, -M_PI_2)) - M_PI, M_PI);
//	}
	double shift = M_PI*000;
	double l = shift - 1e-20;
	double r = shift + 1e-20;
	long int n = 5;
	double dx = (r - l) / n;
	long double res = 0;
	printf("dx=%.15G\nl, r, v\n", dx);
	for(i = 0; i < n; i++) {
		double tl = l + dx * i;
		double tr = l + dx * (i + 1);
//		printf("%.15G %.15G %.15G\n", tl, tr, (sin(tl) + sin(tr)) / 2);
		res += (sin(tl) + sin(tr));
	}
	res *= dx / 2;
//	printf("%.15G\n", (double)res);
//	printf("%.15G\n%.15G\n", (shift + M_PI) - shift, M_PI);
//	printf("%.15G\n", -cos(r)+cos(l));
//	printf("%.15G", sin(M_PI_2*100001));
	long int offset = 1e9;
	printf("%.15G\n%.15G", sin((M_PI + offset) - offset), sin(M_PI));
	return 0;
}
static
ISTATUS
AttenuatedSumReflectorGetAlbedo(
    _In_ const void *context,
    _Out_ float_t *reflectance
    )
{
    PCATTENUATED_SUM_REFLECTOR reflector = (PCATTENUATED_SUM_REFLECTOR)context;

    float_t added_albedo;
    ISTATUS status = ReflectorGetAlbedoInline(reflector->added_reflector,
                                              &added_albedo);

    if (status != ISTATUS_SUCCESS)
    {
        return status;
    }

    status = ReflectorGetAlbedoInline(reflector->attenuated_reflector,
                                      reflectance);

    if (status != ISTATUS_SUCCESS)
    {
        return status;
    }

    *reflectance = fma(reflector->attenuation,
                       *reflectance,
                       added_albedo);

    return ISTATUS_SUCCESS;
}
Exemple #10
0
 inline float log2(const float& a0)
 {
   typedef float A0;
   if (a0 == Inf<A0>()) return a0;
   if (iseqz(a0)) return Minf<A0>();
   if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>();
   A0 x, fe, x2, y;
   kernel_log(a0, fe, x, x2, y);
   y =  fma(Mhalf<A0>(),x2, y);
   // multiply log of fraction by log2(e)
   A0 z = fma( x
             , Const<float, 0x3ee2a8ed>()
             , mul(y,Const<float, 0x3ee2a8ed>())// 0.44269504088896340735992
             );
   return ((z+y)+x)+fe;
 }
Exemple #11
0
 static inline A0_n asin(const A0_n a0_n)
 {
   const A0 a0 = { a0_n };
   typedef typename meta::scalar_of<A0>::type sA0;
   A0 x = nt2::abs(a0);
   const A0 pio4 =  Pio_4<A0>();
   const bA0 small=  lt(x, Sqrteps<A0>());
   const A0 morebits = double_constant<A0, 0xbc91a62633145c07ll>();
   const A0 ct1      = double_constant<A0, 0x3fe4000000000000ll>();
   A0 zz1 = oneminus(x);
   const A0 vp = zz1*horner< NT2_HORNER_COEFF_T(sA0, 5,
                                                (0x3f684fc3988e9f08ll,
                                                 0xbfe2079259f9290fll,
                                                 0x401bdff5baf33e6all,
                                                 0xc03991aaac01ab68ll,
                                                 0x403c896240f3081dll)
                                                )>(zz1)/
                   horner< NT2_HORNER_COEFF_T(sA0, 5,
                                              (0x3ff0000000000000ll,
                                               0xc035f2a2b6bf5d8cll,
                                               0x40626219af6a7f42ll,
                                               0xc077fe08959063eell,
                                               0x40756709b0b644bell)
                                              )>(zz1);
   zz1 =  sqrt(zz1+zz1);
   A0 z = pio4-zz1;
   zz1 = fma(zz1, vp, morebits);
   z =  z-zz1;
   zz1 = z+pio4;
   A0 zz2 = sqr(a0);
   z = zz2*horner< NT2_HORNER_COEFF_T(sA0, 6,
                                  (0x3f716b9b0bd48ad3ll,
                                   0xbfe34341333e5c16ll,
                                   0x4015c74b178a2dd9ll,
                                   0xc0304331de27907bll,
                                   0x40339007da779259ll,
                                   0xc020656c06ceafd5ll)
                                  )>(zz2)/
         horner< NT2_HORNER_COEFF_T(sA0, 6,
                                  (0x3ff0000000000000ll,
                                   0xc02d7b590b5e0eabll,
                                   0x40519fc025fe9054ll,
                                   0xc06265bb6d3576d7ll,
                                   0x4061705684ffbf9dll,
                                   0xc04898220a3607acll)
                                  )>(zz2);
   zz2 = x*z+x;
   return if_nan_else( gt(x, One<A0>())
                     , b_xor ( select( small
                                     , x
                                     , select( gt(x, ct1)
                                             , zz1
                                             , zz2
                                             )
                                     )
                             , bitofsign(a0)
                             )
                     );
 }
Exemple #12
0
 BOOST_FORCEINLINE static void conf_bounds(const A0& a0, A1& a1,
                                           const value_type& alpha )
 {
   typedef nt2::memory::container<tag::table_, value_type, nt2::_2D>  semantic;
   NT2_AS_TERMINAL_IN(semantic, pcov, boost::proto::child_c<3>(a0));
   const In0& x  = boost::proto::child_c<0>(a0);
   const In1& mu = boost::proto::child_c<1>(a0);
   const In2& sigma = boost::proto::child_c<2>(a0);
   auto z = (log(if_zero_else(is_lez(x), x))-mu)/sigma;
   // this is [1, x0]*pcov*[1; x0]
   auto zvar = fma(fma(pcov(2,2), z, Two<value_type>()*pcov(1,2)), z, pcov(1,1));
   BOOST_ASSERT_MSG(nt2::globalall(nt2::is_gez(zvar)), "Covariance matrix must be positive");
   value_type normz = -nt2::norminv(alpha*nt2::Half<value_type>());
   auto halfwidth =  normz*nt2::sqrt(zvar)/sigma;
   boost::proto::child_c<0>(a1) = Half<value_type>()*nt2::erfc(-Sqrt_2o_2<value_type>()*z);
   boost::proto::child_c<1>(a1) = Half<value_type>()*nt2::erfc(-Sqrt_2o_2<value_type>()*(z-halfwidth));
   boost::proto::child_c<2>(a1) = Half<value_type>()*nt2::erfc(-Sqrt_2o_2<value_type>()*(z+halfwidth));
 }
Exemple #13
0
 BOOST_FORCEINLINE static void conf_bounds(const A0& a0, A1& a1,
                                           const value_type& alpha )
 {
   typedef nt2::memory::container<tag::table_, value_type, nt2::_2D>  semantic;
   NT2_AS_TERMINAL_IN(semantic, pcov, boost::proto::child_c<3>(a0));
   const In0& p  = boost::proto::child_c<0>(a0);
   const In1& mu = boost::proto::child_c<1>(a0);
   const In2& sigma = boost::proto::child_c<2>(a0);
   auto logx0 = -Sqrt_2<A0>()*erfcinv( nt2::Two<A0>()*p);
   auto xvar =   fma(fma(pcov(2,2), logx0, Two<value_type>()*pcov(1,2)), logx0, pcov(1,1));
   BOOST_ASSERT_MSG(nt2::globalall(nt2::is_nltz(xvar)), "Covariance matrix must be positive");
   value_type normz = -nt2::norminv(alpha*nt2::Half<value_type>());
   auto halfwidth = normz*nt2::sqrt(xvar);
   boost::proto::child_c<0>(a1) = exp(fma(sigma, logx0, mu));
   auto coef = exp(-halfwidth);
   boost::proto::child_c<1>(a1) = boost::proto::child_c<0>(a1)*coef;
   boost::proto::child_c<2>(a1) = boost::proto::child_c<0>(a1)/coef;
 }
//begin private methods
float ADXL335::geta2d(float gx, float gy)
{
  float a;
  
  a = gx * gx;
  a = fma(gy,gy,a);
  
  return sqrt(a);
}
Exemple #15
0
float MMA845XQ::geta2d(float gx, float gy)
{
  float a;
  
  a = gx * gx;
  a = fma(gy,gy,a);
  
  return sqrt(a);
}
void Adaptive::updateProbsAdvanced(const double wgt, \
                                   const vector<Flashcard> & cards)
{   // Updates probabilities
    double probUnasked = 0.0;
    const double pStar = probability[currentIndex];
    int numOfNumAskedIs0 = 0;
    double alpha = fdim(1.0,pStar), beta;
    double gamma = 0.01, gamWeight = 1.0;   // Experiment with different gammas

    for (usInt ii = 0; ii < probability.size(); ii++)
    {
        if (cards[ii].data.getNumAsked() == 0 && ii != currentIndex)
        {
            probUnasked += probability[ii];
            numOfNumAskedIs0++;
        }
    }

    // Divide-by-zero guard
    if (numOfNumAskedIs0 < (probability.size() - 2))
    {
        gamma = 0.01;
        beta = (gamma * probUnasked / wgt + pStar * alpha) / (alpha - probUnasked);
        gamWeight = 1.0;
    }
    else
    {
        beta = pStar;
        gamWeight = -wgt;
        gamma = beta;
    }

    for (usInt ii = 0; ii < probability.size(); ii++)
    {
        if ( ii == currentIndex )
            probability[ii] *= fma(wgt,alpha,1.0);
        else if ( cards[ii].data.getNumAsked() != 0 )
            probability[ii] *= fma(-wgt,beta,1.0);
        else
            probability[ii] *= fma(gamWeight,gamma,1.0);
    }
}
Exemple #17
0
 static inline A0_n sin_eval(const A0_n z_n, const A0& x)//, const A0&)
 {
   const A0 z = { z_n };
   const A0 y1 = horner< NT2_HORNER_COEFF_T(stype, 6, (0x3de5d8fd1fcf0ec1ll, 
                                                      0xbe5ae5e5a9291691ll, 
                                                      0x3ec71de3567d4896ll, 
                                                      0xbf2a01a019bfdf03ll, 
                                                      0x3f8111111110f7d0ll, 
                                                      0xbfc5555555555548ll) ) > (z);
   return fma(y1*z,x,x);
 }
Exemple #18
0
t_int *near_perform(t_int *w)
{
    t_float *out = (t_float *)(w[3]);
    t_nearctl *ctl = (t_nearctl *)(w[1]);
    t_float state = ctl->c_state;
    char target = ctl->c_target;
    int n = (int)(w[2]);
    t_stage stage;
	if (!target) {
		/*release*/
		if(state == 0.0) while(n--) *out++ = 0.0;
		else {
			stage = ctl->c_release;
			stage.base = ctl->c_linr;
			while(n--){
				*out++ = state;
				state = fma(state, stage.op, stage.base);
				if(state <= 0.0) {
					state = 0.0;
					for(;n;n--) *out++ = state;
				}
			}
		}
	} else {
		/* attack */
		stage = ctl->c_attack;
		if(state == 1.0) while(n--) *out++ = 1.0;
		else while(n--){
				*out++ = state;
				state = fma(state, stage.op, stage.base);
				if(state >= 1.0) {
					state = 1.0;
					for(;n;n--) *out++ = state;
				}
			}
	}
    /* save state */
    ctl->c_state = IS_DENORMAL(state) ? 0 : state;
    ctl->c_target = target;
    return (w+4);
}
Exemple #19
0
/*++
Function:
    fma

See MSDN.
--*/
PALIMPORT double __cdecl PAL_fma(double x, double y, double z)
{
    double ret;
    PERF_ENTRY(fma);
    ENTRY("fma (x=%f, y=%f, z=%f)\n", x, y, z);

    ret = fma(x, y, z);

    LOGEXIT("fma returns double %f\n", ret);
    PERF_EXIT(fma);
    return ret;
}
Exemple #20
0
TEST(AgradRev,fma_ddv_defaultpolicy) {
  double a = 3.0;
  double b = 5.0;
  AVAR c = 7.0;
  AVAR f = fma(a,b,c);
  EXPECT_FLOAT_EQ(3.0 * 5.0 + 7.0, f.val());
  
  AVEC x = createAVEC(c);
  VEC grad_f;
  f.grad(x,grad_f);
  EXPECT_FLOAT_EQ(1.0,grad_f[0]);
}  
Exemple #21
0
int perfTest(struct doubleVector *a,
             const struct doubleVector *b,
             const struct doubleVector *c,
             bool(*fma)(struct doubleVector *,
                        const struct doubleVector *,
                        const struct doubleVector *)) {
  
  int startCycles = rdtsc();
  fma(a, b, c);
  int endCycles = rdtsc();
  return endCycles - startCycles;
}
Exemple #22
0
TEST(AgradRev,fma_vvd_defaultpolicy) {
  AVAR a = 3.0;
  AVAR b = 5.0;
  double c = 7.0;
  AVAR f = fma(a,b,c);
  EXPECT_FLOAT_EQ(3.0 * 5.0 + 7.0, f.val());
  
  AVEC x = createAVEC(a,b);
  VEC grad_f;
  f.grad(x,grad_f);
  EXPECT_FLOAT_EQ(5.0,grad_f[0]);
  EXPECT_FLOAT_EQ(3.0,grad_f[1]);
}  
Exemple #23
0
// CHECK-YES-LABEL: define void @test_fma
// CHECK-NO-LABEL: define void @test_fma
void test_fma(float a0, double a1, long double a2) {
    // CHECK-YES: call float @llvm.fma.f32
    // CHECK-NO: call float @llvm.fma.f32
    float l0 = fmaf(a0, a0, a0);

    // CHECK-YES: call double @llvm.fma.f64
    // CHECK-NO: call double @llvm.fma.f64
    double l1 = fma(a1, a1, a1);

    // CHECK-YES: call x86_fp80 @llvm.fma.f80
    // CHECK-NO: call x86_fp80 @llvm.fma.f80
    long double l2 = fmal(a2, a2, a2);
}
Exemple #24
0
//Advance the solution one step.
// Stops and returns true if the error is less than the given value
void cgSolver::iterate(void)
{
    //Decide how far to advance
    comm.matrixVectorProduct(s_k,tmp);
    double s_kDot;
    s_kDot=comm.dotProduct(s_k,tmp);
    double alpha=residualMagSq/s_kDot;

    //Advance solution along the search direction
    fma(n,x_k, x_k,alpha,s_k);

    //Update residual
    double oldMagSq=residualMagSq;
    fma(n,r_k, r_k,-alpha,tmp);
    residualMagSq=comm.dotProduct(r_k,r_k);

    //printf("residualMagSq=%g\n",residualMagSq);
    //if (sqrt(fabs(residualMagSq))<=maxErr) return true; //We're done

    //Update search direction
    double beta=residualMagSq/oldMagSq;
    fma(n,s_k, r_k,beta,s_k);
}
Exemple #25
0
static t_int *op_perf2(t_int *w) {
	t_triangulator *x = (t_triangulator *)(w[1]);
	t_sample *in = (t_sample *)(w[2]);
	t_sample *out = (t_sample *)(w[3]);
	int n = (int)(w[4]);
	t_float mul = x->invals[0].val;
	t_float add = x->invals[1].val;
	t_sample inter;
	double dphase = x->x_phase + (double)UNITBIT32;
    union tabfudge tf;
    uint32_t casto;
    
    float conv = x->x_conv;
    tf.tf_d = dphase;

    while (n--)
    {
        #ifdef FP_FAST_FMA
        dphase = fma(*in++, conv, dphase);
        #else
        dphase += *in++ * conv;
        #endif
        casto = (uint32_t)tf.tf_i[LOWOFFSET];
        if(casto & 2147483648) /* bit 31 */
        	casto = ~casto;
        inter = (t_sample)casto/1073741823.5 - 1;
        #ifdef FP_FAST_FMA
        *out++ = fma(inter, mul, add);
        #else
        *out++ = inter*mul + add;
        #endif
        tf.tf_d = dphase;
    }
    tf.tf_i[HIOFFSET] = NORMHIPART;
    x->x_phase = tf.tf_d - UNITBIT32;
    return (w+5);
}
Exemple #26
0
 static inline A0_n acos(const A0_n a0_n)
 {
   // 2130706432 values computed.
   // 1968272987 values (92.38%) within 0.0 ULPs
   //  162433445 values (7.62%)  within 0.5 ULPs
   // 8.5 cycles/element SSE4.2 g++-4.8
   const A0 a0 = a0_n;
   A0 x = nt2::abs(a0);
   bA0 x_larger_05 = gt(x, nt2::Half<A0>());
   x  = if_else(x_larger_05, nt2::sqrt(fma(nt2::Mhalf<A0>(), x, nt2::Half<A0>())), a0);
   x  = asin(x);
   x =  seladd(x_larger_05, x, x);
   x  = nt2::if_else(lt(a0, nt2::Mhalf<A0>()), nt2::Pi<A0>()-x, x);
   return nt2::if_else(x_larger_05, x, nt2::Pio_2<A0>()-x);
 }
Exemple #27
0
t_float readtab(t_tabtype type, t_float index) {
	t_float *tab = rexptab + (type * SHABLESIZE);
	int iindex;
	t_float frac, index2;
	index *= SHABLESIZE;
	index = fmax(index, 0);
	if (index >= SHABLESIZE - 1) return tab[SHABLESIZE - 1];
	else {
		iindex = index;
		frac = index - iindex;
		index = tab[iindex++];
		index2 = tab[iindex] - index;
		return fma(frac, index2, index);
	}
}
Exemple #28
0
 static BOOST_FORCEINLINE A0 base_tancot_eval(const A0& x)
 {
   const A0 zz = sqr(x);
   const A0 num = horn<A0,
     0xc1711fead3299176ll,
     0x413199eca5fc9dddll,
     0xc0c992d8d24f3f38ll
     >(zz);
   const A0 den = horn1<A0,
     0xc189afe03cbe5a31ll,
     0x4177d98fc2ead8efll,
     0xc13427bc582abc96ll,
     0x40cab8a5eeb36572ll
     //    0x3ff0000000000000ll
     >(zz);
   return fma(x, (zz*(num/den)), x);
 }
Exemple #29
0
 static inline A digamma_imp_1_2(A x, double)
 {
 //
 // Now the approximation, we use the form:
 //
 // digamma(x) = (x - root) * (Y + R(x-1))
 //
 // Where root is the location of the positive root of digamma,
 // Y is a constant, and R is optimised for low nt2::absolute error
 // compared to Y.
 //
 // Maximum Deviation Found:              3.388e-010
 // At float precision, max error found:  2.008725e-008
 //
 typedef typename meta::scalar_of<A>::type sA; 
 static const A Y = splat<A>(0.99558162689208984);
 
 static const A root1 = splat<A>(1569415565.0 / 1073741824uL);
 static const A root2 = splat<A>((381566830.0 / 1073741824uL) / 1073741824uL);
 static const A root3 = splat<A>(double(0.9016312093258695918615325266959189453125e-19L));
 
 static const boost::array<sA, 6> P = {{    
     sA(0.25479851061131551L),
     sA(-0.32555031186804491L),
     sA(-0.65031853770896507L),
     sA(-0.28919126444774784L),
     sA(-0.045251321448739056L),
     sA(-0.0020713321167745952L)
   }};
 static const  boost::array<sA, 7> Q = {{
     sA(1L),
     sA(2.0767117023730469L),
     sA(1.4606242909763515L),
     sA(0.43593529692665969L),
     sA(0.054151797245674225L),
     sA(0.0021284987017821144L),
     sA(-0.55789841321675513e-6L)
   }};
   A g = x - root1;
   g -= root2;
   g -= root3; 
   x-= One<A>(); 
   A r = eval_poly<6>(x, P)/eval_poly<7>(x, Q);
   A result = fma(g, Y, g * r);
   return result;
 }
  static inline void
  kernel_log(const A0& a0,A0& fe, A0& x,A0& x2, A0& y, const A0&)
  {
    int_type e;
    bf::tie(x, e) = fast_frexp(a0);
    int_type x_lt_sqrthf = -(Const<float,0x3f3504f3>() > x);
    e += x_lt_sqrthf;
    x += b_and(x, genmask<float>(x_lt_sqrthf))+Const<float,0xbf800000>();
    x2 = sqr(x);
    A0 y1 = fma(Const<float, 0x3d9021bb>() ,x2,Const<float, 0x3def251a>() );
    A0 y2 = fma(Const<float, 0xbdebd1b8>() ,x2,Const<float, 0xbdfe5d4f>() );
    y1 = fma(y1,x2,Const<float, 0x3e11e9bf>() );
    y2 = fma(y2,x2,Const<float, 0xbe2aae50>() );
    y1 = fma(y1,x2,Const<float, 0x3e4cceac>() );
    y2 = fma(y2,x2,Const<float, 0xbe7ffffc>() );
    y1 = fma(y1,x2,Const<float, 0x3eaaaaaa>() );
    y  = fma(x,y2,y1)*x*x2;
    fe = tofloat(e);
 }