inline float log(const float& a0) { typedef float A0; if (a0 == Inf<A0>()) return a0; if (iseqz(a0)) return Minf<A0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>(); float x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = fma(fe, Const<float, 0xb95e8083>(), y); y = fma(Mhalf<A0>(), x2, y); A0 z = x + y; return fma(Const<float, 0x3f318000>(), fe, z); }
static t_int *op_perf1(t_int *w) { t_operatord *x = (t_operatord *)(w[1]); t_sample *in = (t_sample *)(w[2]); t_sample *out = (t_sample *)(w[3]); int n = (int)(w[4]); t_sample *mul = x->invals[0].vec; t_float add = x->invals[1].val; float *tab = cos_table, *addr, f1, f2, frac; double dphase; int normhipart; union tabfudge tf; tf.tf_d = UNITBIT32; normhipart = tf.tf_i[HIOFFSET]; dphase = (double)(*in++ * (float)(COSTABSIZE)) + UNITBIT32; tf.tf_d = dphase; addr = tab + (tf.tf_i[HIOFFSET] & (COSTABSIZE-1)); tf.tf_i[HIOFFSET] = normhipart; while (--n) { dphase = (double)(*in++ * (float)(COSTABSIZE)) + UNITBIT32; frac = tf.tf_d - UNITBIT32; tf.tf_d = dphase; f1 = addr[0]; f2 = addr[1]; addr = tab + (tf.tf_i[HIOFFSET] & (COSTABSIZE-1)); #ifdef FP_FAST_FMA dphase = fma(frac, f2 - f1, f1); *out++ = fma(dphase, (*mul++), add); #else dphase = f1 + frac * (f2 - f1); *out++ = dphase*(*mul++) + add; #endif tf.tf_i[HIOFFSET] = normhipart; } frac = tf.tf_d - UNITBIT32; f1 = addr[0]; f2 = addr[1]; #ifdef FP_FAST_FMA dphase = fma(frac, f2 - f1, f1); *out++ = fma(dphase, (*mul++), add); #else dphase = f1 + frac * (f2 - f1); *out++ = dphase*(*mul++) + add; #endif return (w+5); }
inline void kernel_log(const float& a0, float& fe, float& x, float& x2, float& y) { typedef float A0; typedef meta::as_integer<A0, signed>::type int_type; int_type e; boost::fusion::tie(x, e) = fast_frexp(a0); int_type x_lt_sqrthf = -(Const<float, 0x3f3504f3>() > x); e += x_lt_sqrthf; // if (x_lt_sqrthf) x+= x; // x += Const<float, 0xbf800000>(); x += b_and(x, genmask<float>(x_lt_sqrthf))+Const<float,0xbf800000>(); x2 = sqr(x); A0 y1 = fma(Const<float, 0x3d9021bb>() ,x2,Const<float, 0x3def251a>() ); A0 y2 = fma(Const<float, 0xbdebd1b8>() ,x2,Const<float, 0xbdfe5d4f>() ); y1 = fma(y1,x2,Const<float, 0x3e11e9bf>() ); y2 = fma(y2,x2,Const<float, 0xbe2aae50>() ); y1 = fma(y1,x2,Const<float, 0x3e4cceac>() ); y2 = fma(y2,x2,Const<float, 0xbe7ffffc>() ); y1 = fma(y1,x2,Const<float, 0x3eaaaaaa>() ); y = fma(x,y2,y1)*x*x2; fe = tofloat(e); }
void Adaptive::updateProbsBasic(const double wgt) { const double pStar = probability[currentIndex]; const double alpha = fdim(1.0,pStar), beta = pStar; vector<double>::iterator it; const vector<double>::const_iterator itIndex = (probability.begin() + currentIndex); for (it = probability.begin(); it != probability.end(); it++) { if ( it == itIndex ) *it *= fma(wgt,alpha,1.0); else *it *= fma(-wgt,beta,1.0); } }
inline fvar<typename stan::return_type<T1, T2, T3>::type> fma(const fvar<T1>& x1, const fvar<T2>& x2, const fvar<T3>& x3) { return fvar<typename stan::return_type<T1, T2, T3>::type> (fma(x1.val_, x2.val_, x3.val_), x1.d_ * x2.val_ + x2.d_ * x1.val_ + x3.d_); }
result_type operator()(A0& yi, A1& inputs) const { yi.resize(inputs.extent()); const child0 & x = boost::proto::child_c<0>(inputs); if (numel(x) <= 1) BOOST_ASSERT_MSG(numel(x) > 1, "Interpolation requires at least two sample points in each dimension."); else { BOOST_ASSERT_MSG(issorted(x, 'a'), "for 'linear' interpolation x values must be sorted in ascending order"); const child1 & y = boost::proto::child_c<1>(inputs); BOOST_ASSERT_MSG(numel(x) == numel(y), "The grid vectors do not define a grid of points that match the given values."); const child2 & xi = boost::proto::child_c<2>(inputs); bool extrap = false; value_type extrapval = Nan<value_type>(); choices(inputs, extrap, extrapval, N1()); table<index_type> index = bsearch (x, xi); table<value_type> dx = xi-x(index); yi = fma(oneminus(dx), y(index), dx*y(oneplus(index))); value_type b = value_type(x(begin_)); value_type e = value_type(x(end_)); if (!extrap) yi = nt2::if_else(nt2::logical_or(boost::simd::is_nge(xi, b), boost::simd::is_nle(xi, e)), extrapval, yi); } return yi; }
static inline A0_n kernel_atan(const A0_n a0_n) { typedef typename meta::scalar_of<A0>::type sA0; const A0 tan3pio8 = double_constant<A0, 0x4003504f333f9de6ll>(); const A0 tanpio8 = double_constant<A0, 0x3fda827999fcef31ll>(); const A0 a0 = {a0_n}; const A0 x = nt2::abs(a0); const bA0 flag1 = lt(x, tan3pio8); const bA0 flag2 = logical_and(ge(x, tanpio8), flag1); A0 yy = if_zero_else(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, minusone(x)/oneplus(x),xx); A0 z = sqr(xx); z = z*horner< NT2_HORNER_COEFF_T(sA0, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = fma(xx, z, xx); const A0 morebits = double_constant<A0, 0x3c91a62633145c07ll>(); z = seladd(flag2, z, mul(Half<A0>(), morebits)); z = z+if_zero_else(flag1, morebits); return yy + z; }
int main(void) { double r1 = 0.1 * 10 - 1; double r2 = fma(0.1, 10, -1); print_bin_double(r1); print_bin_double(r2); long int i; // printf("off;reg;fma;expect\n"); // for(i = 1; i < 1L << 62; i *= 10) { // printf("%ld;", i); // printf("%.15G;", (i * M_PI + M_PI_2) - (i * M_PI - M_PI_2) - M_PI); // printf("%.15G;%.15G\n", (1* fma(i, M_PI, +M_PI_2) - fma(i, M_PI, -M_PI_2)) - M_PI, M_PI); // } double shift = M_PI*000; double l = shift - 1e-20; double r = shift + 1e-20; long int n = 5; double dx = (r - l) / n; long double res = 0; printf("dx=%.15G\nl, r, v\n", dx); for(i = 0; i < n; i++) { double tl = l + dx * i; double tr = l + dx * (i + 1); // printf("%.15G %.15G %.15G\n", tl, tr, (sin(tl) + sin(tr)) / 2); res += (sin(tl) + sin(tr)); } res *= dx / 2; // printf("%.15G\n", (double)res); // printf("%.15G\n%.15G\n", (shift + M_PI) - shift, M_PI); // printf("%.15G\n", -cos(r)+cos(l)); // printf("%.15G", sin(M_PI_2*100001)); long int offset = 1e9; printf("%.15G\n%.15G", sin((M_PI + offset) - offset), sin(M_PI)); return 0; }
static ISTATUS AttenuatedSumReflectorGetAlbedo( _In_ const void *context, _Out_ float_t *reflectance ) { PCATTENUATED_SUM_REFLECTOR reflector = (PCATTENUATED_SUM_REFLECTOR)context; float_t added_albedo; ISTATUS status = ReflectorGetAlbedoInline(reflector->added_reflector, &added_albedo); if (status != ISTATUS_SUCCESS) { return status; } status = ReflectorGetAlbedoInline(reflector->attenuated_reflector, reflectance); if (status != ISTATUS_SUCCESS) { return status; } *reflectance = fma(reflector->attenuation, *reflectance, added_albedo); return ISTATUS_SUCCESS; }
inline float log2(const float& a0) { typedef float A0; if (a0 == Inf<A0>()) return a0; if (iseqz(a0)) return Minf<A0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<A0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = fma(Mhalf<A0>(),x2, y); // multiply log of fraction by log2(e) A0 z = fma( x , Const<float, 0x3ee2a8ed>() , mul(y,Const<float, 0x3ee2a8ed>())// 0.44269504088896340735992 ); return ((z+y)+x)+fe; }
static inline A0_n asin(const A0_n a0_n) { const A0 a0 = { a0_n }; typedef typename meta::scalar_of<A0>::type sA0; A0 x = nt2::abs(a0); const A0 pio4 = Pio_4<A0>(); const bA0 small= lt(x, Sqrteps<A0>()); const A0 morebits = double_constant<A0, 0xbc91a62633145c07ll>(); const A0 ct1 = double_constant<A0, 0x3fe4000000000000ll>(); A0 zz1 = oneminus(x); const A0 vp = zz1*horner< NT2_HORNER_COEFF_T(sA0, 5, (0x3f684fc3988e9f08ll, 0xbfe2079259f9290fll, 0x401bdff5baf33e6all, 0xc03991aaac01ab68ll, 0x403c896240f3081dll) )>(zz1)/ horner< NT2_HORNER_COEFF_T(sA0, 5, (0x3ff0000000000000ll, 0xc035f2a2b6bf5d8cll, 0x40626219af6a7f42ll, 0xc077fe08959063eell, 0x40756709b0b644bell) )>(zz1); zz1 = sqrt(zz1+zz1); A0 z = pio4-zz1; zz1 = fma(zz1, vp, morebits); z = z-zz1; zz1 = z+pio4; A0 zz2 = sqr(a0); z = zz2*horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3f716b9b0bd48ad3ll, 0xbfe34341333e5c16ll, 0x4015c74b178a2dd9ll, 0xc0304331de27907bll, 0x40339007da779259ll, 0xc020656c06ceafd5ll) )>(zz2)/ horner< NT2_HORNER_COEFF_T(sA0, 6, (0x3ff0000000000000ll, 0xc02d7b590b5e0eabll, 0x40519fc025fe9054ll, 0xc06265bb6d3576d7ll, 0x4061705684ffbf9dll, 0xc04898220a3607acll) )>(zz2); zz2 = x*z+x; return if_nan_else( gt(x, One<A0>()) , b_xor ( select( small , x , select( gt(x, ct1) , zz1 , zz2 ) ) , bitofsign(a0) ) ); }
BOOST_FORCEINLINE static void conf_bounds(const A0& a0, A1& a1, const value_type& alpha ) { typedef nt2::memory::container<tag::table_, value_type, nt2::_2D> semantic; NT2_AS_TERMINAL_IN(semantic, pcov, boost::proto::child_c<3>(a0)); const In0& x = boost::proto::child_c<0>(a0); const In1& mu = boost::proto::child_c<1>(a0); const In2& sigma = boost::proto::child_c<2>(a0); auto z = (log(if_zero_else(is_lez(x), x))-mu)/sigma; // this is [1, x0]*pcov*[1; x0] auto zvar = fma(fma(pcov(2,2), z, Two<value_type>()*pcov(1,2)), z, pcov(1,1)); BOOST_ASSERT_MSG(nt2::globalall(nt2::is_gez(zvar)), "Covariance matrix must be positive"); value_type normz = -nt2::norminv(alpha*nt2::Half<value_type>()); auto halfwidth = normz*nt2::sqrt(zvar)/sigma; boost::proto::child_c<0>(a1) = Half<value_type>()*nt2::erfc(-Sqrt_2o_2<value_type>()*z); boost::proto::child_c<1>(a1) = Half<value_type>()*nt2::erfc(-Sqrt_2o_2<value_type>()*(z-halfwidth)); boost::proto::child_c<2>(a1) = Half<value_type>()*nt2::erfc(-Sqrt_2o_2<value_type>()*(z+halfwidth)); }
BOOST_FORCEINLINE static void conf_bounds(const A0& a0, A1& a1, const value_type& alpha ) { typedef nt2::memory::container<tag::table_, value_type, nt2::_2D> semantic; NT2_AS_TERMINAL_IN(semantic, pcov, boost::proto::child_c<3>(a0)); const In0& p = boost::proto::child_c<0>(a0); const In1& mu = boost::proto::child_c<1>(a0); const In2& sigma = boost::proto::child_c<2>(a0); auto logx0 = -Sqrt_2<A0>()*erfcinv( nt2::Two<A0>()*p); auto xvar = fma(fma(pcov(2,2), logx0, Two<value_type>()*pcov(1,2)), logx0, pcov(1,1)); BOOST_ASSERT_MSG(nt2::globalall(nt2::is_nltz(xvar)), "Covariance matrix must be positive"); value_type normz = -nt2::norminv(alpha*nt2::Half<value_type>()); auto halfwidth = normz*nt2::sqrt(xvar); boost::proto::child_c<0>(a1) = exp(fma(sigma, logx0, mu)); auto coef = exp(-halfwidth); boost::proto::child_c<1>(a1) = boost::proto::child_c<0>(a1)*coef; boost::proto::child_c<2>(a1) = boost::proto::child_c<0>(a1)/coef; }
//begin private methods float ADXL335::geta2d(float gx, float gy) { float a; a = gx * gx; a = fma(gy,gy,a); return sqrt(a); }
float MMA845XQ::geta2d(float gx, float gy) { float a; a = gx * gx; a = fma(gy,gy,a); return sqrt(a); }
void Adaptive::updateProbsAdvanced(const double wgt, \ const vector<Flashcard> & cards) { // Updates probabilities double probUnasked = 0.0; const double pStar = probability[currentIndex]; int numOfNumAskedIs0 = 0; double alpha = fdim(1.0,pStar), beta; double gamma = 0.01, gamWeight = 1.0; // Experiment with different gammas for (usInt ii = 0; ii < probability.size(); ii++) { if (cards[ii].data.getNumAsked() == 0 && ii != currentIndex) { probUnasked += probability[ii]; numOfNumAskedIs0++; } } // Divide-by-zero guard if (numOfNumAskedIs0 < (probability.size() - 2)) { gamma = 0.01; beta = (gamma * probUnasked / wgt + pStar * alpha) / (alpha - probUnasked); gamWeight = 1.0; } else { beta = pStar; gamWeight = -wgt; gamma = beta; } for (usInt ii = 0; ii < probability.size(); ii++) { if ( ii == currentIndex ) probability[ii] *= fma(wgt,alpha,1.0); else if ( cards[ii].data.getNumAsked() != 0 ) probability[ii] *= fma(-wgt,beta,1.0); else probability[ii] *= fma(gamWeight,gamma,1.0); } }
static inline A0_n sin_eval(const A0_n z_n, const A0& x)//, const A0&) { const A0 z = { z_n }; const A0 y1 = horner< NT2_HORNER_COEFF_T(stype, 6, (0x3de5d8fd1fcf0ec1ll, 0xbe5ae5e5a9291691ll, 0x3ec71de3567d4896ll, 0xbf2a01a019bfdf03ll, 0x3f8111111110f7d0ll, 0xbfc5555555555548ll) ) > (z); return fma(y1*z,x,x); }
t_int *near_perform(t_int *w) { t_float *out = (t_float *)(w[3]); t_nearctl *ctl = (t_nearctl *)(w[1]); t_float state = ctl->c_state; char target = ctl->c_target; int n = (int)(w[2]); t_stage stage; if (!target) { /*release*/ if(state == 0.0) while(n--) *out++ = 0.0; else { stage = ctl->c_release; stage.base = ctl->c_linr; while(n--){ *out++ = state; state = fma(state, stage.op, stage.base); if(state <= 0.0) { state = 0.0; for(;n;n--) *out++ = state; } } } } else { /* attack */ stage = ctl->c_attack; if(state == 1.0) while(n--) *out++ = 1.0; else while(n--){ *out++ = state; state = fma(state, stage.op, stage.base); if(state >= 1.0) { state = 1.0; for(;n;n--) *out++ = state; } } } /* save state */ ctl->c_state = IS_DENORMAL(state) ? 0 : state; ctl->c_target = target; return (w+4); }
/*++ Function: fma See MSDN. --*/ PALIMPORT double __cdecl PAL_fma(double x, double y, double z) { double ret; PERF_ENTRY(fma); ENTRY("fma (x=%f, y=%f, z=%f)\n", x, y, z); ret = fma(x, y, z); LOGEXIT("fma returns double %f\n", ret); PERF_EXIT(fma); return ret; }
TEST(AgradRev,fma_ddv_defaultpolicy) { double a = 3.0; double b = 5.0; AVAR c = 7.0; AVAR f = fma(a,b,c); EXPECT_FLOAT_EQ(3.0 * 5.0 + 7.0, f.val()); AVEC x = createAVEC(c); VEC grad_f; f.grad(x,grad_f); EXPECT_FLOAT_EQ(1.0,grad_f[0]); }
int perfTest(struct doubleVector *a, const struct doubleVector *b, const struct doubleVector *c, bool(*fma)(struct doubleVector *, const struct doubleVector *, const struct doubleVector *)) { int startCycles = rdtsc(); fma(a, b, c); int endCycles = rdtsc(); return endCycles - startCycles; }
TEST(AgradRev,fma_vvd_defaultpolicy) { AVAR a = 3.0; AVAR b = 5.0; double c = 7.0; AVAR f = fma(a,b,c); EXPECT_FLOAT_EQ(3.0 * 5.0 + 7.0, f.val()); AVEC x = createAVEC(a,b); VEC grad_f; f.grad(x,grad_f); EXPECT_FLOAT_EQ(5.0,grad_f[0]); EXPECT_FLOAT_EQ(3.0,grad_f[1]); }
// CHECK-YES-LABEL: define void @test_fma // CHECK-NO-LABEL: define void @test_fma void test_fma(float a0, double a1, long double a2) { // CHECK-YES: call float @llvm.fma.f32 // CHECK-NO: call float @llvm.fma.f32 float l0 = fmaf(a0, a0, a0); // CHECK-YES: call double @llvm.fma.f64 // CHECK-NO: call double @llvm.fma.f64 double l1 = fma(a1, a1, a1); // CHECK-YES: call x86_fp80 @llvm.fma.f80 // CHECK-NO: call x86_fp80 @llvm.fma.f80 long double l2 = fmal(a2, a2, a2); }
//Advance the solution one step. // Stops and returns true if the error is less than the given value void cgSolver::iterate(void) { //Decide how far to advance comm.matrixVectorProduct(s_k,tmp); double s_kDot; s_kDot=comm.dotProduct(s_k,tmp); double alpha=residualMagSq/s_kDot; //Advance solution along the search direction fma(n,x_k, x_k,alpha,s_k); //Update residual double oldMagSq=residualMagSq; fma(n,r_k, r_k,-alpha,tmp); residualMagSq=comm.dotProduct(r_k,r_k); //printf("residualMagSq=%g\n",residualMagSq); //if (sqrt(fabs(residualMagSq))<=maxErr) return true; //We're done //Update search direction double beta=residualMagSq/oldMagSq; fma(n,s_k, r_k,beta,s_k); }
static t_int *op_perf2(t_int *w) { t_triangulator *x = (t_triangulator *)(w[1]); t_sample *in = (t_sample *)(w[2]); t_sample *out = (t_sample *)(w[3]); int n = (int)(w[4]); t_float mul = x->invals[0].val; t_float add = x->invals[1].val; t_sample inter; double dphase = x->x_phase + (double)UNITBIT32; union tabfudge tf; uint32_t casto; float conv = x->x_conv; tf.tf_d = dphase; while (n--) { #ifdef FP_FAST_FMA dphase = fma(*in++, conv, dphase); #else dphase += *in++ * conv; #endif casto = (uint32_t)tf.tf_i[LOWOFFSET]; if(casto & 2147483648) /* bit 31 */ casto = ~casto; inter = (t_sample)casto/1073741823.5 - 1; #ifdef FP_FAST_FMA *out++ = fma(inter, mul, add); #else *out++ = inter*mul + add; #endif tf.tf_d = dphase; } tf.tf_i[HIOFFSET] = NORMHIPART; x->x_phase = tf.tf_d - UNITBIT32; return (w+5); }
static inline A0_n acos(const A0_n a0_n) { // 2130706432 values computed. // 1968272987 values (92.38%) within 0.0 ULPs // 162433445 values (7.62%) within 0.5 ULPs // 8.5 cycles/element SSE4.2 g++-4.8 const A0 a0 = a0_n; A0 x = nt2::abs(a0); bA0 x_larger_05 = gt(x, nt2::Half<A0>()); x = if_else(x_larger_05, nt2::sqrt(fma(nt2::Mhalf<A0>(), x, nt2::Half<A0>())), a0); x = asin(x); x = seladd(x_larger_05, x, x); x = nt2::if_else(lt(a0, nt2::Mhalf<A0>()), nt2::Pi<A0>()-x, x); return nt2::if_else(x_larger_05, x, nt2::Pio_2<A0>()-x); }
t_float readtab(t_tabtype type, t_float index) { t_float *tab = rexptab + (type * SHABLESIZE); int iindex; t_float frac, index2; index *= SHABLESIZE; index = fmax(index, 0); if (index >= SHABLESIZE - 1) return tab[SHABLESIZE - 1]; else { iindex = index; frac = index - iindex; index = tab[iindex++]; index2 = tab[iindex] - index; return fma(frac, index2, index); } }
static BOOST_FORCEINLINE A0 base_tancot_eval(const A0& x) { const A0 zz = sqr(x); const A0 num = horn<A0, 0xc1711fead3299176ll, 0x413199eca5fc9dddll, 0xc0c992d8d24f3f38ll >(zz); const A0 den = horn1<A0, 0xc189afe03cbe5a31ll, 0x4177d98fc2ead8efll, 0xc13427bc582abc96ll, 0x40cab8a5eeb36572ll // 0x3ff0000000000000ll >(zz); return fma(x, (zz*(num/den)), x); }
static inline A digamma_imp_1_2(A x, double) { // // Now the approximation, we use the form: // // digamma(x) = (x - root) * (Y + R(x-1)) // // Where root is the location of the positive root of digamma, // Y is a constant, and R is optimised for low nt2::absolute error // compared to Y. // // Maximum Deviation Found: 3.388e-010 // At float precision, max error found: 2.008725e-008 // typedef typename meta::scalar_of<A>::type sA; static const A Y = splat<A>(0.99558162689208984); static const A root1 = splat<A>(1569415565.0 / 1073741824uL); static const A root2 = splat<A>((381566830.0 / 1073741824uL) / 1073741824uL); static const A root3 = splat<A>(double(0.9016312093258695918615325266959189453125e-19L)); static const boost::array<sA, 6> P = {{ sA(0.25479851061131551L), sA(-0.32555031186804491L), sA(-0.65031853770896507L), sA(-0.28919126444774784L), sA(-0.045251321448739056L), sA(-0.0020713321167745952L) }}; static const boost::array<sA, 7> Q = {{ sA(1L), sA(2.0767117023730469L), sA(1.4606242909763515L), sA(0.43593529692665969L), sA(0.054151797245674225L), sA(0.0021284987017821144L), sA(-0.55789841321675513e-6L) }}; A g = x - root1; g -= root2; g -= root3; x-= One<A>(); A r = eval_poly<6>(x, P)/eval_poly<7>(x, Q); A result = fma(g, Y, g * r); return result; }
static inline void kernel_log(const A0& a0,A0& fe, A0& x,A0& x2, A0& y, const A0&) { int_type e; bf::tie(x, e) = fast_frexp(a0); int_type x_lt_sqrthf = -(Const<float,0x3f3504f3>() > x); e += x_lt_sqrthf; x += b_and(x, genmask<float>(x_lt_sqrthf))+Const<float,0xbf800000>(); x2 = sqr(x); A0 y1 = fma(Const<float, 0x3d9021bb>() ,x2,Const<float, 0x3def251a>() ); A0 y2 = fma(Const<float, 0xbdebd1b8>() ,x2,Const<float, 0xbdfe5d4f>() ); y1 = fma(y1,x2,Const<float, 0x3e11e9bf>() ); y2 = fma(y2,x2,Const<float, 0xbe2aae50>() ); y1 = fma(y1,x2,Const<float, 0x3e4cceac>() ); y2 = fma(y2,x2,Const<float, 0xbe7ffffc>() ); y1 = fma(y1,x2,Const<float, 0x3eaaaaaa>() ); y = fma(x,y2,y1)*x*x2; fe = tofloat(e); }