/* Invert *X and store in *Y. Relative error bound: - For P = 2: 1.001 * R ^ (1 - P) - For P = 3: 1.063 * R ^ (1 - P) - For P > 3: 2.001 * R ^ (1 - P) *X = 0 is not permissible. */ static void SECTION __inv (const mp_no *x, mp_no *y, int p) { long i; double t; mp_no z, w; static const int np1[] = { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; __cpy (x, &z, p); z.e = 0; __mp_dbl (&z, &t, p); t = 1 / t; __dbl_mp (t, y, p); EY -= EX; for (i = 0; i < np1[p]; i++) { __cpy (y, &w, p); __mul (x, &w, y, p); __sub (&__mptwo, y, &z, p); __mul (&w, &z, y, p); } }
static void SECTION cc32(mp_no *x, mp_no *y, int p) { int i; double a; #if 0 double b; static const mp_no mpone = {1,{1.0,1.0}}; #endif mp_no mpt1,x2,gor,sum ,mpk={1,{1.0}}; #if 0 mp_no mpt2; #endif for (i=1;i<=p;i++) mpk.d[i]=0; __mul(x,x,&x2,p); mpk.d[1]=27.0; __mul(&oofac27,&mpk,&gor,p); __cpy(&gor,&sum,p); for (a=26.0;a>2.0;a-=2.0) { mpk.d[1]=a*(a-1.0); __mul(&gor,&mpk,&mpt1,p); __cpy(&mpt1,&gor,p); __mul(&x2,&sum,&mpt1,p); __sub(&gor,&mpt1,&sum,p); } __mul(&x2,&sum,y,p); }
void SECTION __mpsqrt (mp_no *x, mp_no *y, int p) { int i, m, ey; double dx, dy; static const mp_no mphalf = {0, {1.0, HALFRAD}}; static const mp_no mp3halfs = {1, {1.0, 1.0, HALFRAD}}; mp_no mpxn, mpz, mpu, mpt1, mpt2; ey = EX / 2; __cpy (x, &mpxn, p); mpxn.e -= (ey + ey); __mp_dbl (&mpxn, &dx, p); dy = fastiroot (dx); __dbl_mp (dy, &mpu, p); __mul (&mpxn, &mphalf, &mpz, p); m = __mpsqrt_mp[p]; for (i = 0; i < m; i++) { __sqr (&mpu, &mpt1, p); __mul (&mpt1, &mpz, &mpt2, p); __sub (&mp3halfs, &mpt2, &mpt1, p); __mul (&mpu, &mpt1, &mpt2, p); __cpy (&mpt2, &mpu, p); } __mul (&mpxn, &mpu, y, p); EY += ey; }
void __inv(const mp_no *x, mp_no *y, int p) { int i; #if 0 int l; #endif double t; mp_no z,w; static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4}; const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p); t=ONE/t; __dbl_mp(t,y,p); EY -= EX; for (i=0; i<np1[p]; i++) { __cpy(y,&w,p); __mul(x,&w,y,p); __sub(&mptwo,y,&z,p); __mul(&w,&z,y,p); } return; }
double __slowpow(double x, double y, double z) { double res,res1; mp_no mpx, mpy, mpz,mpw,mpp,mpr,mpr1; static const mp_no eps = {-3,{1.0,4.0}}; int p; res = __halfulp(x,y); /* halfulp() returns -10 or x^y */ if (res >= 0) return res; /* if result was really computed by halfulp */ /* else, if result was not really computed by halfulp */ p = 10; /* p=precision */ __dbl_mp(x,&mpx,p); __dbl_mp(y,&mpy,p); __dbl_mp(z,&mpz,p); __mplog(&mpx, &mpz, p); /* log(x) = z */ __mul(&mpy,&mpz,&mpw,p); /* y * z =w */ __mpexp(&mpw, &mpp, p); /* e^w =pp */ __add(&mpp,&eps,&mpr,p); /* pp+eps =r */ __mp_dbl(&mpr, &res, p); __sub(&mpp,&eps,&mpr1,p); /* pp -eps =r1 */ __mp_dbl(&mpr1, &res1, p); /* converting into double precision */ if (res == res1) return res; p = 32; /* if we get here result wasn't calculated exactly, continue */ __dbl_mp(x,&mpx,p); /* for more exact calculation */ __dbl_mp(y,&mpy,p); __dbl_mp(z,&mpz,p); __mplog(&mpx, &mpz, p); /* log(c)=z */ __mul(&mpy,&mpz,&mpw,p); /* y*z =w */ __mpexp(&mpw, &mpp, p); /* e^w=pp */ __mp_dbl(&mpp, &res, p); /* converting into double precision */ return res; }
/* Perform range reduction of a double number x into multi precision number y, such that y = x - n * pi / 2, abs (y) < pi / 4, n = 0, +-1, +-2, ... Return int which indicates in which quarter of circle x is. */ int SECTION __mpranred (double x, mp_no *y, int p) { number v; double t, xn; int i, k, n; mp_no a, b, c; if (ABS (x) < 2.8e14) { t = (x * hpinv.d + toint.d); xn = t - toint.d; v.d = t; n = v.i[LOW_HALF] & 3; __dbl_mp (xn, &a, p); __mul (&a, &hp, &b, p); __dbl_mp (x, &c, p); __sub (&c, &b, y, p); return n; } else { /* If x is very big more precision required. */ __dbl_mp (x, &a, p); a.d[0] = 1.0; k = a.e - 5; if (k < 0) k = 0; b.e = -k; b.d[0] = 1.0; for (i = 0; i < p; i++) b.d[i + 1] = toverp[i + k]; __mul (&a, &b, &c, p); t = c.d[c.e]; for (i = 1; i <= p - c.e; i++) c.d[i] = c.d[i + c.e]; for (i = p + 1 - c.e; i <= p; i++) c.d[i] = 0; c.e = 0; if (c.d[1] >= HALFRAD) { t += 1.0; __sub (&c, &__mpone, &b, p); __mul (&b, &hp, y, p); } else __mul (&c, &hp, y, p); n = (int) t; if (x < 0) { y->d[0] = -y->d[0]; n = -n; } return (n & 3); } }
/* Stage 3: Perform a multi-Precision computation */ static double SECTION atan2Mp (double x, double y, const int pr[]) { double z1, z2; int i, p; mp_no mpx, mpy, mpz, mpz1, mpz2, mperr, mpt1; for (i = 0; i < MM; i++) { p = pr[i]; __dbl_mp (x, &mpx, p); __dbl_mp (y, &mpy, p); __mpatan2 (&mpy, &mpx, &mpz, p); __dbl_mp (ud[i].d, &mpt1, p); __mul (&mpz, &mpt1, &mperr, p); __add (&mpz, &mperr, &mpz1, p); __sub (&mpz, &mperr, &mpz2, p); __mp_dbl (&mpz1, &z1, p); __mp_dbl (&mpz2, &z2, p); if (z1 == z2) { LIBC_PROBE (slowatan2, 4, &p, &x, &y, &z1); return z1; } } LIBC_PROBE (slowatan2_inexact, 4, &p, &x, &y, &z1); return z1; /*if impossible to do exact computing */ }
/*Converting from double precision to Multi-precision and calculating e^x */ double SECTION __slowexp (double x) { #ifndef USE_LONG_DOUBLE_FOR_MP double w, z, res, eps = 3.0e-26; int p; mp_no mpx, mpy, mpz, mpw, mpeps, mpcor; /* Use the multiple precision __MPEXP function to compute the exponential First at 144 bits and if it is not accurate enough, at 768 bits. */ p = 6; __dbl_mp (x, &mpx, p); __mpexp (&mpx, &mpy, p); __dbl_mp (eps, &mpeps, p); __mul (&mpeps, &mpy, &mpcor, p); __add (&mpy, &mpcor, &mpw, p); __sub (&mpy, &mpcor, &mpz, p); __mp_dbl (&mpw, &w, p); __mp_dbl (&mpz, &z, p); if (w == z) return w; else { p = 32; __dbl_mp (x, &mpx, p); __mpexp (&mpx, &mpy, p); __mp_dbl (&mpy, &res, p); return res; } #else return (double) __ieee754_expl((long double)x); #endif }
/*Converting from double precision to Multi-precision and calculating e^x */ double SECTION __slowexp(double x) { double w,z,res,eps=3.0e-26; #if 0 double y; #endif int p; #if 0 int orig,i; #endif mp_no mpx, mpy, mpz,mpw,mpeps,mpcor; p=6; __dbl_mp(x,&mpx,p); /* Convert a double precision number x */ /* into a multiple precision number mpx with prec. p. */ __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */ __dbl_mp(eps,&mpeps,p); __mul(&mpeps,&mpy,&mpcor,p); __add(&mpy,&mpcor,&mpw,p); __sub(&mpy,&mpcor,&mpz,p); __mp_dbl(&mpw, &w, p); __mp_dbl(&mpz, &z, p); if (w == z) return w; else { /* if calculating is not exactly */ p = 32; __dbl_mp(x,&mpx,p); __mpexp(&mpx, &mpy, p); __mp_dbl(&mpy, &res, p); return res; } }
void __mplog(mp_no *x, mp_no *y, int p) { #include "mplog.h" int i,m; #if 0 int j,k,m1,m2,n; double a,b; #endif static const int mp[33] = {0,0,0,0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,4,4,4,4,4,4}; mp_no mpone = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; mp_no mpt1,mpt2; /* Choose m and initiate mpone */ m = mp[p]; mpone.e = 1; mpone.d[0]=mpone.d[1]=ONE; /* Perform m newton iterations to solve for y: exp(y)-x=0. */ /* The iterations formula is: y(n+1)=y(n)+(x*exp(-y(n))-1). */ __cpy(y,&mpt1,p); for (i=0; i<m; i++) { mpt1.d[0]=-mpt1.d[0]; __mpexp(&mpt1,&mpt2,p); __mul(x,&mpt2,&mpt1,p); __sub(&mpt1,&mpone,&mpt2,p); __add(y,&mpt2,&mpt1,p); __cpy(&mpt1,y,p); } return; }
/*Converting from double precision to Multi-precision and calculating e^x */ double __slowexp(double x) { #ifdef NO_LONG_DOUBLE double w,z,res,eps=3.0e-26; int p; mp_no mpx, mpy, mpz,mpw,mpeps,mpcor; p=6; __dbl_mp(x,&mpx,p); /* Convert a double precision number x */ /* into a multiple precision number mpx with prec. p. */ __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */ __dbl_mp(eps,&mpeps,p); __mul(&mpeps,&mpy,&mpcor,p); __add(&mpy,&mpcor,&mpw,p); __sub(&mpy,&mpcor,&mpz,p); __mp_dbl(&mpw, &w, p); __mp_dbl(&mpz, &z, p); if (w == z) return w; else { /* if calculating is not exactly */ p = 32; __dbl_mp(x,&mpx,p); __mpexp(&mpx, &mpy, p); __mp_dbl(&mpy, &res, p); return res; } #else return (double) __ieee754_expl((long double)x); #endif }
/* y=0 is not permitted if x<=0. No error messages are given. */ void __mpatan2(mp_no *y, mp_no *x, mp_no *z, int p) { static const double ZERO = 0.0, ONE = 1.0; mp_no mpone = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; mp_no mpt1,mpt2,mpt3; if (X[0] <= ZERO) { mpone.e = 1; mpone.d[0] = mpone.d[1] = ONE; __dvd(x,y,&mpt1,p); __mul(&mpt1,&mpt1,&mpt2,p); if (mpt1.d[0] != ZERO) mpt1.d[0] = ONE; __add(&mpt2,&mpone,&mpt3,p); __mpsqrt(&mpt3,&mpt2,p); __add(&mpt1,&mpt2,&mpt3,p); mpt3.d[0]=Y[0]; __mpatan(&mpt3,&mpt1,p); __add(&mpt1,&mpt1,z,p); } else { __dvd(y,x,&mpt1,p); __mpatan(&mpt1,z,p); } return; }
/* Final stages. Compute atan(x) by multiple precision arithmetic */ static double atanMp (double x, const int pr[]) { mp_no mpx, mpy, mpy2, mperr, mpt1, mpy1; double y1, y2; int i, p; for (i = 0; i < M; i++) { p = pr[i]; __dbl_mp (x, &mpx, p); __mpatan (&mpx, &mpy, p); __dbl_mp (u9[i].d, &mpt1, p); __mul (&mpy, &mpt1, &mperr, p); __add (&mpy, &mperr, &mpy1, p); __sub (&mpy, &mperr, &mpy2, p); __mp_dbl (&mpy1, &y1, p); __mp_dbl (&mpy2, &y2, p); if (y1 == y2) { LIBC_PROBE (slowatan, 3, &p, &x, &y1); return y1; } } LIBC_PROBE (slowatan_inexact, 3, &p, &x, &y1); return y1; /*if impossible to do exact computing */ }
void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) { mp_no w; if (X[0] == ZERO) Z[0] = ZERO; else {__inv(y,&w,p); __mul(x,&w,z,p);} return; }
void __c32(mp_no *x, mp_no *y, mp_no *z, int p) { static const mp_no mpt={1,{1.0,2.0}}, one={1,{1.0,1.0}}; mp_no u,t,t1,t2,c,s; int i; __cpy(x,&u,p); u.e=u.e-1; cc32(&u,&c,p); ss32(&u,&s,p); for (i=0;i<24;i++) { __mul(&c,&s,&t,p); __sub(&s,&t,&t1,p); __add(&t1,&t1,&s,p); __sub(&mpt,&c,&t1,p); __mul(&t1,&c,&t2,p); __add(&t2,&t2,&c,p); } __sub(&one,&c,y,p); __cpy(&s,z,p); }
static void SECTION ss32(mp_no *x, mp_no *y, int p) { int i; double a; mp_no mpt1,x2,gor,sum ,mpk={1,{1.0}}; for (i=1;i<=p;i++) mpk.d[i]=0; __sqr(x,&x2,p); __cpy(&oofac27,&gor,p); __cpy(&gor,&sum,p); for (a=27.0;a>1.0;a-=2.0) { mpk.d[1]=a*(a-1.0); __mul(&gor,&mpk,&mpt1,p); __cpy(&mpt1,&gor,p); __mul(&x2,&sum,&mpt1,p); __sub(&gor,&mpt1,&sum,p); } __mul(x,&sum,y,p); }
void SECTION __c32(mp_no *x, mp_no *y, mp_no *z, int p) { mp_no u,t,t1,t2,c,s; int i; __cpy(x,&u,p); u.e=u.e-1; cc32(&u,&c,p); ss32(&u,&s,p); for (i=0;i<24;i++) { __mul(&c,&s,&t,p); __sub(&s,&t,&t1,p); __add(&t1,&t1,&s,p); __sub(&mptwo,&c,&t1,p); __mul(&t1,&c,&t2,p); __add(&t2,&t2,&c,p); } __sub(&mpone,&c,y,p); __cpy(&s,z,p); }
/* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z or Y and Z. Relative error bound: - For P = 2: 2.001 * R ^ (1 - P) - For P = 3: 2.063 * R ^ (1 - P) - For P > 3: 3.001 * R ^ (1 - P) *X = 0 is not permissible. */ void SECTION __dvd (const mp_no *x, const mp_no *y, mp_no *z, int p) { mp_no w; if (X[0] == 0) Z[0] = 0; else { __inv (y, &w, p); __mul (x, &w, z, p); } }
/* Treat the Denormalized case */ static double SECTION normalized (double ax, double ay, double y, double z) { int p; mp_no mpx, mpy, mpz, mperr, mpz2, mpt1; p = 6; __dbl_mp (ax, &mpx, p); __dbl_mp (ay, &mpy, p); __dvd (&mpy, &mpx, &mpz, p); __dbl_mp (ue.d, &mpt1, p); __mul (&mpz, &mpt1, &mperr, p); __sub (&mpz, &mperr, &mpz2, p); __mp_dbl (&mpz2, &z, p); return signArctan2 (y, z); }
/*Converting from double precision to Multi-precision and calculating e^x */ double SECTION __slowexp (double x) { #ifndef USE_LONG_DOUBLE_FOR_MP double w, z, res, eps = 3.0e-26; int p; mp_no mpx, mpy, mpz, mpw, mpeps, mpcor; /* Use the multiple precision __MPEXP function to compute the exponential First at 144 bits and if it is not accurate enough, at 768 bits. */ p = 6; __dbl_mp (x, &mpx, p); __mpexp (&mpx, &mpy, p); __dbl_mp (eps, &mpeps, p); __mul (&mpeps, &mpy, &mpcor, p); __add (&mpy, &mpcor, &mpw, p); __sub (&mpy, &mpcor, &mpz, p); __mp_dbl (&mpw, &w, p); __mp_dbl (&mpz, &z, p); if (w == z) { /* Track how often we get to the slow exp code plus its input/output values. */ LIBC_PROBE (slowexp_p6, 2, &x, &w); return w; } else { p = 32; __dbl_mp (x, &mpx, p); __mpexp (&mpx, &mpy, p); __mp_dbl (&mpy, &res, p); /* Track how often we get to the uber-slow exp code plus its input/output values. */ LIBC_PROBE (slowexp_p32, 2, &x, &res); return res; } #else return (double) __ieee754_expl((long double)x); #endif }
/*Converting from double precision to Multi-precision and calculating e^x */ double __slowexp(double x) { double w,z,res,eps=3.0e-26; #if 0 double y; #endif int p; #if 0 int orig,i; #endif mp_no mpx, mpy, mpz,mpw,mpeps,mpcor; p=6; __dbl_mp(x,&mpx,p); /* Convert a double precision number x */ /* into a multiple precision number mpx with prec. p. */ __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */ __dbl_mp(eps,&mpeps,p); __mul(&mpeps,&mpy,&mpcor,p); __add(&mpy,&mpcor,&mpw,p); __sub(&mpy,&mpcor,&mpz,p); __mp_dbl(&mpw, &w, p); __mp_dbl(&mpz, &z, p); if (w == z) { /* Track how often we get to the slow exp code plus its input/output values. */ LIBC_PROBE (slowexp_p6, 2, &x, &w); return w; } else { /* if calculating is not exactly */ p = 32; __dbl_mp(x,&mpx,p); __mpexp(&mpx, &mpy, p); __mp_dbl(&mpy, &res, p); /* Track how often we get to the uber-slow exp code plus its input/output values. */ LIBC_PROBE (slowexp_p32, 2, &x, &res); return res; } }
void __mpexp(mp_no *x, mp_no *y, int p) { int i,j,k,m,m1,m2,n; Double a,b; static const int np[33] = {0,0,0,0,3,3,4,4,5,4,4,5,5,5,6,6,6,6,6,6, 6,6,6,6,7,7,7,7,8,8,8,8,8}; static const int m1p[33]= {0,0,0,0,17,23,23,28,27,38,42,39,43,47,43,47,50,54, 57,60,64,67,71,74,68,71,74,77,70,73,76,78,81}; static const int m1np[7][18] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0,36,48,60,72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0,24,32,40,48,56,64,72, 0, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0,17,23,29,35,41,47,53,59,65, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0,23,28,33,38,42,47,52,57,62,66, 0, 0}, { 0, 0, 0, 0, 0, 0, 0, 0,27, 0, 0,39,43,47,51,55,59,63}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,43,47,50,54}}; mp_no mpone = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; mp_no mpk = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; mp_no mps,mpak,mpt1,mpt2; /* Choose m,n and compute a=2**(-m) */ n = np[p]; m1 = m1p[p]; a = twomm1[p].d(); for (i=0; i<EX; i++) a *= RADIXI; for ( ; i>EX; i--) a *= RADIX; b = X[1]*RADIXI; m2 = 24*EX; for (; b<HALF; m2--) { a *= TWO; b *= TWO; } if (b == HALF) { for (i=2; i<=p; i++) { if (X[i]!=ZERO) break; } if (i==p+1) { m2--; a *= TWO; } } if ((m=m1+m2) <= 0) { m=0; a=ONE; for (i=n-1; i>0; i--,n--) { if (m1np[i][p]+m2>0) break; } } /* Compute s=x*2**(-m). Put result in mps */ __dbl_mp(a,&mpt1,p); __mul(x,&mpt1,&mps,p); /* Evaluate the polynomial. Put result in mpt2 */ mpone.e=1; mpone.d(0)=ONE; mpone.d(1)=ONE; mpk.e = 1; mpk.d(0) = ONE; mpk.d(1)=nn[n].d(); __dvd(&mps,&mpk,&mpt1,p); __add(&mpone,&mpt1,&mpak,p); for (k=n-1; k>1; k--) { __mul(&mps,&mpak,&mpt1,p); mpk.d(1)=nn[k].d(); __dvd(&mpt1,&mpk,&mpt2,p); __add(&mpone,&mpt2,&mpak,p); } __mul(&mps,&mpak,&mpt1,p); __add(&mpone,&mpt1,&mpt2,p); /* Raise polynomial value to the power of 2**m. Put result in y */ for (k=0,j=0; k<m; ) { __mul(&mpt2,&mpt2,&mpt1,p); k++; if (k==m) { j=1; break; } __mul(&mpt1,&mpt1,&mpt2,p); k++; } if (j) __cpy(&mpt1,y,p); else __cpy(&mpt2,y,p); return; }
void SECTION __mpatan(mp_no *x, mp_no *y, int p) { int i,m,n; double dx; mp_no mpone = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}, mptwo = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}, mptwoim1 = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; mp_no mps,mpsm,mpt,mpt1,mpt2,mpt3; /* Choose m and initiate mpone, mptwo & mptwoim1 */ if (EX>0) m=7; else if (EX<0) m=0; else { __mp_dbl(x,&dx,p); dx=ABS(dx); for (m=6; m>0; m--) {if (dx>__atan_xm[m].d) break;} } mpone.e = mptwo.e = mptwoim1.e = 1; mpone.d[0] = mpone.d[1] = mptwo.d[0] = mptwoim1.d[0] = ONE; mptwo.d[1] = TWO; /* Reduce x m times */ __mul(x,x,&mpsm,p); if (m==0) __cpy(x,&mps,p); else { for (i=0; i<m; i++) { __add(&mpone,&mpsm,&mpt1,p); __mpsqrt(&mpt1,&mpt2,p); __add(&mpt2,&mpt2,&mpt1,p); __add(&mptwo,&mpsm,&mpt2,p); __add(&mpt1,&mpt2,&mpt3,p); __dvd(&mpsm,&mpt3,&mpt1,p); __cpy(&mpt1,&mpsm,p); } __mpsqrt(&mpsm,&mps,p); mps.d[0] = X[0]; } /* Evaluate a truncated power series for Atan(s) */ n=__atan_np[p]; mptwoim1.d[1] = __atan_twonm1[p].d; __dvd(&mpsm,&mptwoim1,&mpt,p); for (i=n-1; i>1; i--) { mptwoim1.d[1] -= TWO; __dvd(&mpsm,&mptwoim1,&mpt1,p); __mul(&mpsm,&mpt,&mpt2,p); __sub(&mpt1,&mpt2,&mpt,p); } __mul(&mps,&mpt,&mpt1,p); __sub(&mps,&mpt1,&mpt,p); /* Compute Atan(x) */ mptwoim1.d[1] = __atan_twom[m].d; __mul(&mptwoim1,&mpt,y,p); return; }
slong _nmod_poly_xgcd_hgcd(mp_ptr G, mp_ptr S, mp_ptr T, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const slong cutoff = FLINT_BIT_COUNT(mod.n) <= 8 ? NMOD_POLY_SMALL_GCD_CUTOFF : NMOD_POLY_GCD_CUTOFF; slong lenG, lenS, lenT; if (lenB == 1) { G[0] = B[0]; T[0] = 1; lenG = 1; lenS = 0; lenT = 1; } else { mp_ptr q = _nmod_vec_init(lenA + lenB); mp_ptr r = q + lenA; slong lenq, lenr; __divrem(q, lenq, r, lenr, A, lenA, B, lenB); if (lenr == 0) { __set(G, lenG, B, lenB); T[0] = 1; lenS = 0; lenT = 1; } else { mp_ptr h, j, v, w, R[4], X; slong lenh, lenj, lenv, lenw, lenR[4]; int sgnR; lenh = lenj = lenB; lenv = lenw = lenA + lenB - 2; lenR[0] = lenR[1] = lenR[2] = lenR[3] = (lenB + 1) / 2; X = _nmod_vec_init(2 * lenh + 2 * lenv + 4 * lenR[0]); h = X; j = h + lenh; v = j + lenj; w = v + lenv; R[0] = w + lenw; R[1] = R[0] + lenR[0]; R[2] = R[1] + lenR[1]; R[3] = R[2] + lenR[2]; sgnR = _nmod_poly_hgcd(R, lenR, h, &lenh, j, &lenj, B, lenB, r, lenr, mod); if (sgnR > 0) { _nmod_vec_neg(S, R[1], lenR[1], mod); _nmod_vec_set(T, R[0], lenR[0]); } else { _nmod_vec_set(S, R[1], lenR[1]); _nmod_vec_neg(T, R[0], lenR[0], mod); } lenS = lenR[1]; lenT = lenR[0]; while (lenj != 0) { __divrem(q, lenq, r, lenr, h, lenh, j, lenj); __mul(v, lenv, q, lenq, T, lenT); { slong l; _nmod_vec_swap(S, T, FLINT_MAX(lenS, lenT)); l = lenS; lenS = lenT; lenT = l; } __sub(T, lenT, T, lenT, v, lenv); if (lenr == 0) { __set(G, lenG, j, lenj); goto cofactor; } if (lenj < cutoff) { mp_ptr u0 = R[0], u1 = R[1]; slong lenu0 = lenr - 1, lenu1 = lenj - 1; lenG = _nmod_poly_xgcd_euclidean(G, u0, u1, j, lenj, r, lenr, mod); MPN_NORM(u0, lenu0); MPN_NORM(u1, lenu1); __mul(v, lenv, S, lenS, u0, lenu0); __mul(w, lenw, T, lenT, u1, lenu1); __add(S, lenS, v, lenv, w, lenw); goto cofactor; } sgnR = _nmod_poly_hgcd(R, lenR, h, &lenh, j, &lenj, j,lenj, r, lenr, mod); __mul(v, lenv, R[1], lenR[1], T, lenT); __mul(w, lenw, R[2], lenR[2], S, lenS); __mul(q, lenq, S, lenS, R[3], lenR[3]); if (sgnR > 0) __sub(S, lenS, q, lenq, v, lenv); else __sub(S, lenS, v, lenv, q, lenq); __mul(q, lenq, T, lenT, R[0], lenR[0]); if (sgnR > WORD(0)) __sub(T, lenT, q, lenq, w, lenw); else __sub(T, lenT, w, lenw, q, lenq); } __set(G, lenG, h, lenh); cofactor: __mul(v, lenv, S, lenS, A, lenA); __sub(w, lenw, G, lenG, v, lenv); __div(T, lenT, w, lenw, B, lenB); _nmod_vec_clear(X); } _nmod_vec_clear(q); } flint_mpn_zero(S + lenS, lenB - 1 - lenS); flint_mpn_zero(T + lenT, lenA - 1 - lenT); return lenG; }
/* Multi-Precision exponential function subroutine (for p >= 4, 2**(-55) <= abs(x) <= 1024). */ void SECTION __mpexp (mp_no *x, mp_no *y, int p) { int i, j, k, m, m1, m2, n; mantissa_t b; static const int np[33] = { 0, 0, 0, 0, 3, 3, 4, 4, 5, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8 }; static const int m1p[33] = { 0, 0, 0, 0, 17, 23, 23, 28, 27, 38, 42, 39, 43, 47, 43, 47, 50, 54, 57, 60, 64, 67, 71, 74, 68, 71, 74, 77, 70, 73, 76, 78, 81 }; static const int m1np[7][18] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 36, 48, 60, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 24, 32, 40, 48, 56, 64, 72, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 17, 23, 29, 35, 41, 47, 53, 59, 65, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 23, 28, 33, 38, 42, 47, 52, 57, 62, 66, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 39, 43, 47, 51, 55, 59, 63}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 47, 50, 54} }; mp_no mps, mpk, mpt1, mpt2; /* Choose m,n and compute a=2**(-m). */ n = np[p]; m1 = m1p[p]; b = X[1]; m2 = 24 * EX; for (; b < HALFRAD; m2--) b *= 2; if (b == HALFRAD) { for (i = 2; i <= p; i++) { if (X[i] != 0) break; } if (i == p + 1) m2--; } m = m1 + m2; if (__glibc_unlikely (m <= 0)) { /* The m1np array which is used to determine if we can reduce the polynomial expansion iterations, has only 18 elements. Besides, numbers smaller than those required by p >= 18 should not come here at all since the fast phase of exp returns 1.0 for anything less than 2^-55. */ assert (p < 18); m = 0; for (i = n - 1; i > 0; i--, n--) if (m1np[i][p] + m2 > 0) break; } /* Compute s=x*2**(-m). Put result in mps. This is the range-reduced input that we will use to compute e^s. For the final result, simply raise it to 2^m. */ __pow_mp (-m, &mpt1, p); __mul (x, &mpt1, &mps, p); /* Compute the Taylor series for e^s: 1 + x/1! + x^2/2! + x^3/3! ... for N iterations. We compute this as: e^x = 1 + (x * n!/1! + x^2 * n!/2! + x^3 * n!/3!) / n! = 1 + (x * (n!/1! + x * (n!/2! + x * (n!/3! + x ...)))) / n! k! is computed on the fly as KF and at the end of the polynomial loop, KF is n!, which can be used directly. */ __cpy (&mps, &mpt2, p); double kf = 1.0; /* Evaluate the rest. The result will be in mpt2. */ for (k = n - 1; k > 0; k--) { /* n! / k! = n * (n - 1) ... * (n - k + 1) */ kf *= k + 1; __dbl_mp (kf, &mpk, p); __add (&mpt2, &mpk, &mpt1, p); __mul (&mps, &mpt1, &mpt2, p); } __dbl_mp (kf, &mpk, p); __dvd (&mpt2, &mpk, &mpt1, p); __add (&__mpone, &mpt1, &mpt2, p); /* Raise polynomial value to the power of 2**m. Put result in y. */ for (k = 0, j = 0; k < m;) { __sqr (&mpt2, &mpt1, p); k++; if (k == m) { j = 1; break; } __sqr (&mpt1, &mpt2, p); k++; } if (j) __cpy (&mpt1, y, p); else __cpy (&mpt2, y, p); return; }