// Simplified transform when only in[0], in[1] and in[4] are non-zero static void TransformAC3(const int16_t* in, uint8_t* dst) { const int a = in[0] + 4; const int c4 = MUL2(in[4]); const int d4 = MUL1(in[4]); const int c1 = MUL2(in[1]); const int d1 = MUL1(in[1]); STORE2(0, a + d4, d1, c1); STORE2(1, a + c4, d1, c1); STORE2(2, a - c4, d1, c1); STORE2(3, a - d4, d1, c1); }
int main(int argc, const char * argv[]) { add(1,2); add(1,2); PRINTMAX(12, 13); PRINTMAX(12, 13); printf("%d\n",MAXOFNUMBER(100, 200)); printf("*******************\n"); double sum = ADD(1.1, 2);//预处理 阶段 就会换成 1+2 printf("sum = %f\n",sum); printf("%d\n",ADD(1, 2)*ADD(2, 3));//8 //1+2*2+3 printf("%d\n",ADD2(1, 2)*ADD2(2, 3));//(1+2)*(2+3) printf("%d\n",MUL(3-1, 5-2));//(3-1*5-2) printf("%d\n",MUL2(3-1, 5-2));//((3-1)*(5-2)) printf("*******************\n"); printf(kPath); double r = 2.0; double s = PI*r*r; double c = 2*PI*r; printf("s = %f c= %f\n",s,c); return 0; }
double tan(double x) { #include "utan.h" #include "utan.tbl" int ux,i,n; double a,da,a2,b,db,c,dc,c1,cc1,c2,cc2,c3,cc3,fi,ffi,gi,pz,s,sy, t,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,w,x2,xn,xx2,y,ya,yya,z0,z,zz,z2,zz2; int p; number num,v; mp_no mpa,mpt1,mpt2; #if 0 mp_no mpy; #endif int __branred(double, double *, double *); int __mpranred(double, mp_no *, int); /* x=+-INF, x=NaN */ num.d = x; ux = num.i[HIGH_HALF]; if ((ux&0x7ff00000)==0x7ff00000) return x-x; w=(x<ZERO) ? -x : x; /* (I) The case abs(x) <= 1.259e-8 */ if (w<=g1.d) return x; /* (II) The case 1.259e-8 < abs(x) <= 0.0608 */ if (w<=g2.d) { /* First stage */ x2 = x*x; t2 = x*x2*(d3.d+x2*(d5.d+x2*(d7.d+x2*(d9.d+x2*d11.d)))); if ((y=x+(t2-u1.d*t2)) == x+(t2+u1.d*t2)) return y; /* Second stage */ c1 = x2*(a15.d+x2*(a17.d+x2*(a19.d+x2*(a21.d+x2*(a23.d+x2*(a25.d+ x2*a27.d)))))); EMULV(x,x,x2,xx2,t1,t2,t3,t4,t5) ADD2(a13.d,aa13.d,c1,zero.d,c2,cc2,t1,t2) MUL2(x2,xx2,c2,cc2,c1,cc1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(a11.d,aa11.d,c1,cc1,c2,cc2,t1,t2) MUL2(x2,xx2,c2,cc2,c1,cc1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(a9.d ,aa9.d ,c1,cc1,c2,cc2,t1,t2) MUL2(x2,xx2,c2,cc2,c1,cc1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(a7.d ,aa7.d ,c1,cc1,c2,cc2,t1,t2) MUL2(x2,xx2,c2,cc2,c1,cc1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(a5.d ,aa5.d ,c1,cc1,c2,cc2,t1,t2) MUL2(x2,xx2,c2,cc2,c1,cc1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(a3.d ,aa3.d ,c1,cc1,c2,cc2,t1,t2) MUL2(x2,xx2,c2,cc2,c1,cc1,t1,t2,t3,t4,t5,t6,t7,t8) MUL2(x ,zero.d,c1,cc1,c2,cc2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(x ,zero.d,c2,cc2,c1,cc1,t1,t2) if ((y=c1+(cc1-u2.d*c1)) == c1+(cc1+u2.d*c1)) return y; return tanMp(x); }
static void TransformOne(const int16_t* in, uint8_t* dst) { int C[4 * 4], *tmp; int i; tmp = C; for (i = 0; i < 4; ++i) { // vertical pass const int a = in[0] + in[8]; // [-4096, 4094] const int b = in[0] - in[8]; // [-4095, 4095] const int c = MUL2(in[4]) - MUL1(in[12]); // [-3783, 3783] const int d = MUL1(in[4]) + MUL2(in[12]); // [-3785, 3781] tmp[0] = a + d; // [-7881, 7875] tmp[1] = b + c; // [-7878, 7878] tmp[2] = b - c; // [-7878, 7878] tmp[3] = a - d; // [-7877, 7879] tmp += 4; in++; } // Each pass is expanding the dynamic range by ~3.85 (upper bound). // The exact value is (2. + (20091 + 35468) / 65536). // After the second pass, maximum interval is [-3794, 3794], assuming // an input in [-2048, 2047] interval. We then need to add a dst value // in the [0, 255] range. // In the worst case scenario, the input to clip_8b() can be as large as // [-60713, 60968]. tmp = C; for (i = 0; i < 4; ++i) { // horizontal pass const int dc = tmp[0] + 4; const int a = dc + tmp[8]; const int b = dc - tmp[8]; const int c = MUL2(tmp[4]) - MUL1(tmp[12]); const int d = MUL1(tmp[4]) + MUL2(tmp[12]); STORE(0, 0, a + d); STORE(1, 0, b + c); STORE(2, 0, b - c); STORE(3, 0, a - d); tmp++; dst += BPS; } }
void SECTION __doasin(double x, double dx, double v[]) { #include "doasin.h" static const double d5 = 0.22372159090911789889975459505194491E-01, d6 = 0.17352764422456822913014975683014622E-01, d7 = 0.13964843843786693521653681033981614E-01, d8 = 0.11551791438485242609036067259086589E-01, d9 = 0.97622386568166960207425666787248914E-02, d10 = 0.83638737193775788576092749009744976E-02, d11 = 0.79470250400727425881446981833568758E-02; double xx,p,pp,u,uu,r,s; double tc,tcc; #ifndef DLA_FMS double hx,tx,hy,ty,tp,tq; #endif /* Taylor series for arcsin for Double-Length numbers */ xx = x*x+2.0*x*dx; p = ((((((d11*xx+d10)*xx+d9)*xx+d8)*xx+d7)*xx+d6)*xx+d5)*xx; pp = 0; MUL2(x,dx,x,dx,u,uu,tp,hx,tx,hy,ty,tq,tc,tcc); ADD2(p,pp,c4.x,cc4.x,p,pp,r,s); MUL2(p,pp,u,uu,p,pp,tp,hx,tx,hy,ty,tq,tc,tcc); ADD2(p,pp,c3.x,cc3.x,p,pp,r,s); MUL2(p,pp,u,uu,p,pp,tp,hx,tx,hy,ty,tq,tc,tcc); ADD2(p,pp,c2.x,cc2.x,p,pp,r,s); MUL2(p,pp,u,uu,p,pp,tp,hx,tx,hy,ty,tq,tc,tcc); ADD2(p,pp,c1.x,cc1.x,p,pp,r,s); MUL2(p,pp,u,uu,p,pp,tp,hx,tx,hy,ty,tq,tc,tcc); MUL2(p,pp,x,dx,p,pp,tp,hx,tx,hy,ty,tq,tc,tcc); ADD2(p,pp,x,dx,p,pp,r,s); v[0]=p; v[1]=pp; /* arcsin(x+dx)=v[0]+v[1] */ }
double SECTION __ieee754_atan2 (double y, double x) { int i, de, ux, dx, uy, dy; static const int pr[MM] = { 6, 8, 10, 20, 32 }; double ax, ay, u, du, u9, ua, v, vv, dv, t1, t2, t3, t7, t8, z, zz, cor, s1, ss1, s2, ss2; #ifndef DLA_FMS double t4, t5, t6; #endif number num; static const int ep = 59768832, /* 57*16**5 */ em = -59768832; /* -57*16**5 */ /* x=NaN or y=NaN */ num.d = x; ux = num.i[HIGH_HALF]; dx = num.i[LOW_HALF]; if ((ux & 0x7ff00000) == 0x7ff00000) { if (((ux & 0x000fffff) | dx) != 0x00000000) return x + x; } num.d = y; uy = num.i[HIGH_HALF]; dy = num.i[LOW_HALF]; if ((uy & 0x7ff00000) == 0x7ff00000) { if (((uy & 0x000fffff) | dy) != 0x00000000) return y + y; } /* y=+-0 */ if (uy == 0x00000000) { if (dy == 0x00000000) { if ((ux & 0x80000000) == 0x00000000) return 0; else return opi.d; } } else if (uy == 0x80000000) { if (dy == 0x00000000) { if ((ux & 0x80000000) == 0x00000000) return -0.0; else return mopi.d; } } /* x=+-0 */ if (x == 0) { if ((uy & 0x80000000) == 0x00000000) return hpi.d; else return mhpi.d; } /* x=+-INF */ if (ux == 0x7ff00000) { if (dx == 0x00000000) { if (uy == 0x7ff00000) { if (dy == 0x00000000) return qpi.d; } else if (uy == 0xfff00000) { if (dy == 0x00000000) return mqpi.d; } else { if ((uy & 0x80000000) == 0x00000000) return 0; else return -0.0; } } } else if (ux == 0xfff00000) { if (dx == 0x00000000) { if (uy == 0x7ff00000) { if (dy == 0x00000000) return tqpi.d; } else if (uy == 0xfff00000) { if (dy == 0x00000000) return mtqpi.d; } else { if ((uy & 0x80000000) == 0x00000000) return opi.d; else return mopi.d; } } } /* y=+-INF */ if (uy == 0x7ff00000) { if (dy == 0x00000000) return hpi.d; } else if (uy == 0xfff00000) { if (dy == 0x00000000) return mhpi.d; } /* either x/y or y/x is very close to zero */ ax = (x < 0) ? -x : x; ay = (y < 0) ? -y : y; de = (uy & 0x7ff00000) - (ux & 0x7ff00000); if (de >= ep) { return ((y > 0) ? hpi.d : mhpi.d); } else if (de <= em) { if (x > 0) { if ((z = ay / ax) < TWOM1022) return normalized (ax, ay, y, z); else return signArctan2 (y, z); } else { return ((y > 0) ? opi.d : mopi.d); } } /* if either x or y is extremely close to zero, scale abs(x), abs(y). */ if (ax < twom500.d || ay < twom500.d) { ax *= two500.d; ay *= two500.d; } /* Likewise for large x and y. */ if (ax > two500.d || ay > two500.d) { ax *= twom500.d; ay *= twom500.d; } /* x,y which are neither special nor extreme */ if (ay < ax) { u = ay / ax; EMULV (ax, u, v, vv, t1, t2, t3, t4, t5); du = ((ay - v) - vv) / ax; } else { u = ax / ay; EMULV (ay, u, v, vv, t1, t2, t3, t4, t5); du = ((ax - v) - vv) / ay; } if (x > 0) { /* (i) x>0, abs(y)< abs(x): atan(ay/ax) */ if (ay < ax) { if (u < inv16.d) { v = u * u; zz = du + u * v * (d3.d + v * (d5.d + v * (d7.d + v * (d9.d + v * (d11.d + v * d13.d))))); if ((z = u + (zz - u1.d * u)) == u + (zz + u1.d * u)) return signArctan2 (y, z); MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8); s1 = v * (f11.d + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d)))); ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (u, du, s2, ss2, s1, ss1, t1, t2); if ((z = s1 + (ss1 - u5.d * s1)) == s1 + (ss1 + u5.d * s1)) return signArctan2 (y, z); return atan2Mp (x, y, pr); } i = (TWO52 + TWO8 * u) - TWO52; i -= 16; t3 = u - cij[i][0].d; EADD (t3, du, v, dv); t1 = cij[i][1].d; t2 = cij[i][2].d; zz = v * t2 + (dv * t2 + v * v * (cij[i][3].d + v * (cij[i][4].d + v * (cij[i][5].d + v * cij[i][6].d)))); if (i < 112) { if (i < 48) u9 = u91.d; /* u < 1/4 */ else u9 = u92.d; } /* 1/4 <= u < 1/2 */ else { if (i < 176) u9 = u93.d; /* 1/2 <= u < 3/4 */ else u9 = u94.d; } /* 3/4 <= u <= 1 */ if ((z = t1 + (zz - u9 * t1)) == t1 + (zz + u9 * t1)) return signArctan2 (y, z); t1 = u - hij[i][0].d; EADD (t1, du, v, vv); s1 = v * (hij[i][11].d + v * (hij[i][12].d + v * (hij[i][13].d + v * (hij[i][14].d + v * hij[i][15].d)))); ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); if ((z = s2 + (ss2 - ub.d * s2)) == s2 + (ss2 + ub.d * s2)) return signArctan2 (y, z); return atan2Mp (x, y, pr); } /* (ii) x>0, abs(x)<=abs(y): pi/2-atan(ax/ay) */ if (u < inv16.d) { v = u * u; zz = u * v * (d3.d + v * (d5.d + v * (d7.d + v * (d9.d + v * (d11.d + v * d13.d))))); ESUB (hpi.d, u, t2, cor); t3 = ((hpi1.d + cor) - du) - zz; if ((z = t2 + (t3 - u2.d)) == t2 + (t3 + u2.d)) return signArctan2 (y, z); MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8); s1 = v * (f11.d + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d)))); ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (u, du, s2, ss2, s1, ss1, t1, t2); SUB2 (hpi.d, hpi1.d, s1, ss1, s2, ss2, t1, t2); if ((z = s2 + (ss2 - u6.d)) == s2 + (ss2 + u6.d)) return signArctan2 (y, z); return atan2Mp (x, y, pr); } i = (TWO52 + TWO8 * u) - TWO52; i -= 16; v = (u - cij[i][0].d) + du; zz = hpi1.d - v * (cij[i][2].d + v * (cij[i][3].d + v * (cij[i][4].d + v * (cij[i][5].d + v * cij[i][6].d)))); t1 = hpi.d - cij[i][1].d; if (i < 112) ua = ua1.d; /* w < 1/2 */ else ua = ua2.d; /* w >= 1/2 */ if ((z = t1 + (zz - ua)) == t1 + (zz + ua)) return signArctan2 (y, z); t1 = u - hij[i][0].d; EADD (t1, du, v, vv); s1 = v * (hij[i][11].d + v * (hij[i][12].d + v * (hij[i][13].d + v * (hij[i][14].d + v * hij[i][15].d)))); ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); SUB2 (hpi.d, hpi1.d, s2, ss2, s1, ss1, t1, t2); if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d)) return signArctan2 (y, z); return atan2Mp (x, y, pr); } /* (iii) x<0, abs(x)< abs(y): pi/2+atan(ax/ay) */ if (ax < ay) { if (u < inv16.d) { v = u * u; zz = u * v * (d3.d + v * (d5.d + v * (d7.d + v * (d9.d + v * (d11.d + v * d13.d))))); EADD (hpi.d, u, t2, cor); t3 = ((hpi1.d + cor) + du) + zz; if ((z = t2 + (t3 - u3.d)) == t2 + (t3 + u3.d)) return signArctan2 (y, z); MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8); s1 = v * (f11.d + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d)))); ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (u, du, s2, ss2, s1, ss1, t1, t2); ADD2 (hpi.d, hpi1.d, s1, ss1, s2, ss2, t1, t2); if ((z = s2 + (ss2 - u7.d)) == s2 + (ss2 + u7.d)) return signArctan2 (y, z); return atan2Mp (x, y, pr); } i = (TWO52 + TWO8 * u) - TWO52; i -= 16; v = (u - cij[i][0].d) + du; zz = hpi1.d + v * (cij[i][2].d + v * (cij[i][3].d + v * (cij[i][4].d + v * (cij[i][5].d + v * cij[i][6].d)))); t1 = hpi.d + cij[i][1].d; if (i < 112) ua = ua1.d; /* w < 1/2 */ else ua = ua2.d; /* w >= 1/2 */ if ((z = t1 + (zz - ua)) == t1 + (zz + ua)) return signArctan2 (y, z); t1 = u - hij[i][0].d; EADD (t1, du, v, vv); s1 = v * (hij[i][11].d + v * (hij[i][12].d + v * (hij[i][13].d + v * (hij[i][14].d + v * hij[i][15].d)))); ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); ADD2 (hpi.d, hpi1.d, s2, ss2, s1, ss1, t1, t2); if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d)) return signArctan2 (y, z); return atan2Mp (x, y, pr); } /* (iv) x<0, abs(y)<=abs(x): pi-atan(ax/ay) */ if (u < inv16.d) { v = u * u; zz = u * v * (d3.d + v * (d5.d + v * (d7.d + v * (d9.d + v * (d11.d + v * d13.d))))); ESUB (opi.d, u, t2, cor); t3 = ((opi1.d + cor) - du) - zz; if ((z = t2 + (t3 - u4.d)) == t2 + (t3 + u4.d)) return signArctan2 (y, z); MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8); s1 = v * (f11.d + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d)))); ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (u, du, s2, ss2, s1, ss1, t1, t2); SUB2 (opi.d, opi1.d, s1, ss1, s2, ss2, t1, t2); if ((z = s2 + (ss2 - u8.d)) == s2 + (ss2 + u8.d)) return signArctan2 (y, z); return atan2Mp (x, y, pr); } i = (TWO52 + TWO8 * u) - TWO52; i -= 16; v = (u - cij[i][0].d) + du; zz = opi1.d - v * (cij[i][2].d + v * (cij[i][3].d + v * (cij[i][4].d + v * (cij[i][5].d + v * cij[i][6].d)))); t1 = opi.d - cij[i][1].d; if (i < 112) ua = ua1.d; /* w < 1/2 */ else ua = ua2.d; /* w >= 1/2 */ if ((z = t1 + (zz - ua)) == t1 + (zz + ua)) return signArctan2 (y, z); t1 = u - hij[i][0].d; EADD (t1, du, v, vv); s1 = v * (hij[i][11].d + v * (hij[i][12].d + v * (hij[i][13].d + v * (hij[i][14].d + v * hij[i][15].d)))); ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); SUB2 (opi.d, opi1.d, s2, ss2, s1, ss1, t1, t2); if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d)) return signArctan2 (y, z); return atan2Mp (x, y, pr); }
/* routine computes the correctly rounded (to nearest) value of atan(x). */ double atan(double x) { double cor,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,u,u2,u3, v,vv,w,ww,y,yy,z,zz; #if 0 double y1,y2; #endif int i,ux,dx; #if 0 int p; #endif static const int pr[M]={6,8,10,32}; number num; #if 0 mp_no mpt1,mpx,mpy,mpy1,mpy2,mperr; #endif num.d = x; ux = num.i[HIGH_HALF]; dx = num.i[LOW_HALF]; /* x=NaN */ if (((ux&0x7ff00000)==0x7ff00000) && (((ux&0x000fffff)|dx)!=0x00000000)) return x+x; /* Regular values of x, including denormals +-0 and +-INF */ u = (x<ZERO) ? -x : x; if (u<C) { if (u<B) { if (u<A) { /* u < A */ return x; } else { /* A <= u < B */ v=x*x; yy=x*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d))))); if ((y=x+(yy-U1*x)) == x+(yy+U1*x)) return y; EMULV(x,x,v,vv,t1,t2,t3,t4,t5) /* v+vv=x^2 */ s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d)))); ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2) MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2) MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2) MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2) MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) MUL2(x,ZERO,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(x,ZERO,s2,ss2,s1,ss1,t1,t2) if ((y=s1+(ss1-U5*s1)) == s1+(ss1+U5*s1)) return y; return atanMp(x,pr); } } else { /* B <= u < C */ i=(TWO52+TWO8*u)-TWO52; i-=16; z=u-cij[i][0].d; yy=z*(cij[i][2].d+z*(cij[i][3].d+z*(cij[i][4].d+ z*(cij[i][5].d+z* cij[i][6].d)))); t1=cij[i][1].d; if (i<112) { if (i<48) u2=U21; /* u < 1/4 */ else u2=U22; } /* 1/4 <= u < 1/2 */ else { if (i<176) u2=U23; /* 1/2 <= u < 3/4 */ else u2=U24; } /* 3/4 <= u <= 1 */ if ((y=t1+(yy-u2*t1)) == t1+(yy+u2*t1)) return __signArctan(x,y); z=u-hij[i][0].d; s1=z*(hij[i][11].d+z*(hij[i][12].d+z*(hij[i][13].d+ z*(hij[i][14].d+z* hij[i][15].d)))); ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2) MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2) MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2) MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2) MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2) if ((y=s2+(ss2-U6*s2)) == s2+(ss2+U6*s2)) return __signArctan(x,y); return atanMp(x,pr); } }
double SECTION __ieee754_log(double x) { #define M 4 static const int pr[M]={8,10,18,32}; int i,j,n,ux,dx,p; #if 0 int k; #endif double dbl_n,u,p0,q,r0,w,nln2a,luai,lubi,lvaj,lvbj, sij,ssij,ttij,A,B,B0,y,y1,y2,polI,polII,sa,sb, t1,t2,t7,t8,t,ra,rb,ww, a0,aa0,s1,s2,ss2,s3,ss3,a1,aa1,a,aa,b,bb,c; #ifndef DLA_FMS double t3,t4,t5,t6; #endif number num; mp_no mpx,mpy,mpy1,mpy2,mperr; #include "ulog.tbl" #include "ulog.h" /* Treating special values of x ( x<=0, x=INF, x=NaN etc.). */ num.d = x; ux = num.i[HIGH_HALF]; dx = num.i[LOW_HALF]; n=0; if (__builtin_expect(ux < 0x00100000, 0)) { if (__builtin_expect(((ux & 0x7fffffff) | dx) == 0, 0)) return MHALF/ZERO; /* return -INF */ if (__builtin_expect(ux < 0, 0)) return (x-x)/ZERO; /* return NaN */ n -= 54; x *= two54.d; /* scale x */ num.d = x; } if (__builtin_expect(ux >= 0x7ff00000, 0)) return x+x; /* INF or NaN */ /* Regular values of x */ w = x-ONE; if (__builtin_expect(ABS(w) > U03, 1)) { goto case_03; } /*--- Stage I, the case abs(x-1) < 0.03 */ t8 = MHALF*w; EMULV(t8,w,a,aa,t1,t2,t3,t4,t5) EADD(w,a,b,bb) /* Evaluate polynomial II */ polII = (b0.d+w*(b1.d+w*(b2.d+w*(b3.d+w*(b4.d+ w*(b5.d+w*(b6.d+w*(b7.d+w*b8.d))))))))*w*w*w; c = (aa+bb)+polII; /* End stage I, case abs(x-1) < 0.03 */ if ((y=b+(c+b*E2)) == b+(c-b*E2)) return y; /*--- Stage II, the case abs(x-1) < 0.03 */ a = d11.d+w*(d12.d+w*(d13.d+w*(d14.d+w*(d15.d+w*(d16.d+ w*(d17.d+w*(d18.d+w*(d19.d+w*d20.d)))))))); EMULV(w,a,s2,ss2,t1,t2,t3,t4,t5) ADD2(d10.d,dd10.d,s2,ss2,s3,ss3,t1,t2) MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(d9.d,dd9.d,s2,ss2,s3,ss3,t1,t2) MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(d8.d,dd8.d,s2,ss2,s3,ss3,t1,t2) MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(d7.d,dd7.d,s2,ss2,s3,ss3,t1,t2) MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(d6.d,dd6.d,s2,ss2,s3,ss3,t1,t2) MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(d5.d,dd5.d,s2,ss2,s3,ss3,t1,t2) MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(d4.d,dd4.d,s2,ss2,s3,ss3,t1,t2) MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(d3.d,dd3.d,s2,ss2,s3,ss3,t1,t2) MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(d2.d,dd2.d,s2,ss2,s3,ss3,t1,t2) MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) MUL2(w,ZERO,s2,ss2,s3,ss3,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(w,ZERO, s3,ss3, b, bb,t1,t2) /* End stage II, case abs(x-1) < 0.03 */ if ((y=b+(bb+b*E4)) == b+(bb-b*E4)) return y; goto stage_n; /*--- Stage I, the case abs(x-1) > 0.03 */ case_03: /* Find n,u such that x = u*2**n, 1/sqrt(2) < u < sqrt(2) */ n += (num.i[HIGH_HALF] >> 20) - 1023; num.i[HIGH_HALF] = (num.i[HIGH_HALF] & 0x000fffff) | 0x3ff00000; if (num.d > SQRT_2) { num.d *= HALF; n++; } u = num.d; dbl_n = (double) n; /* Find i such that ui=1+(i-75)/2**8 is closest to u (i= 0,1,2,...,181) */ num.d += h1.d; i = (num.i[HIGH_HALF] & 0x000fffff) >> 12; /* Find j such that vj=1+(j-180)/2**16 is closest to v=u/ui (j= 0,...,361) */ num.d = u*Iu[i].d + h2.d; j = (num.i[HIGH_HALF] & 0x000fffff) >> 4; /* Compute w=(u-ui*vj)/(ui*vj) */ p0=(ONE+(i-75)*DEL_U)*(ONE+(j-180)*DEL_V); q=u-p0; r0=Iu[i].d*Iv[j].d; w=q*r0; /* Evaluate polynomial I */ polI = w+(a2.d+a3.d*w)*w*w; /* Add up everything */ nln2a = dbl_n*LN2A; luai = Lu[i][0].d; lubi = Lu[i][1].d; lvaj = Lv[j][0].d; lvbj = Lv[j][1].d; EADD(luai,lvaj,sij,ssij) EADD(nln2a,sij,A ,ttij) B0 = (((lubi+lvbj)+ssij)+ttij)+dbl_n*LN2B; B = polI+B0; /* End stage I, case abs(x-1) >= 0.03 */ if ((y=A+(B+E1)) == A+(B-E1)) return y; /*--- Stage II, the case abs(x-1) > 0.03 */ /* Improve the accuracy of r0 */ EMULV(p0,r0,sa,sb,t1,t2,t3,t4,t5) t=r0*((ONE-sa)-sb); EADD(r0,t,ra,rb) /* Compute w */ MUL2(q,ZERO,ra,rb,w,ww,t1,t2,t3,t4,t5,t6,t7,t8) EADD(A,B0,a0,aa0) /* Evaluate polynomial III */ s1 = (c3.d+(c4.d+c5.d*w)*w)*w; EADD(c2.d,s1,s2,ss2) MUL2(s2,ss2,w,ww,s3,ss3,t1,t2,t3,t4,t5,t6,t7,t8) MUL2(s3,ss3,w,ww,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8) ADD2(s2,ss2,w,ww,s3,ss3,t1,t2) ADD2(s3,ss3,a0,aa0,a1,aa1,t1,t2) /* End stage II, case abs(x-1) >= 0.03 */ if ((y=a1+(aa1+E3)) == a1+(aa1-E3)) return y; /* Final stages. Use multi-precision arithmetic. */ stage_n: for (i=0; i<M; i++) { p = pr[i]; __dbl_mp(x,&mpx,p); __dbl_mp(y,&mpy,p); __mplog(&mpx,&mpy,p); __dbl_mp(e[i].d,&mperr,p); __add(&mpy,&mperr,&mpy1,p); __sub(&mpy,&mperr,&mpy2,p); __mp_dbl(&mpy1,&y1,p); __mp_dbl(&mpy2,&y2,p); if (y1==y2) return y1; } return y1; }
void SECTION __dubsin (double x, double dx, double v[]) { double r, s, c, cc, d, dd, d2, dd2, e, ee, sn, ssn, cs, ccs, ds, dss, dc, dcc; #ifndef DLA_FMS double p, hx, tx, hy, ty, q; #endif mynumber u; int4 k; u.x = x + big.x; k = u.i[LOW_HALF] << 2; x = x - (u.x - big.x); d = x + dx; dd = (x - d) + dx; /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */ MUL2 (d, dd, d, dd, d2, dd2, p, hx, tx, hy, ty, q, c, cc); sn = __sincostab.x[k]; /* */ ssn = __sincostab.x[k + 1]; /* sin(Xi) and cos(Xi) */ cs = __sincostab.x[k + 2]; /* */ ccs = __sincostab.x[k + 3]; /* */ /* Taylor series for sin ds=sin(t) */ MUL2 (d2, dd2, s7.x, ss7.x, ds, dss, p, hx, tx, hy, ty, q, c, cc); ADD2 (ds, dss, s5.x, ss5.x, ds, dss, r, s); MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc); ADD2 (ds, dss, s3.x, ss3.x, ds, dss, r, s); MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc); MUL2 (d, dd, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc); ADD2 (ds, dss, d, dd, ds, dss, r, s); /* Taylor series for cos dc=cos(t) */ MUL2 (d2, dd2, c8.x, cc8.x, dc, dcc, p, hx, tx, hy, ty, q, c, cc); ADD2 (dc, dcc, c6.x, cc6.x, dc, dcc, r, s); MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc); ADD2 (dc, dcc, c4.x, cc4.x, dc, dcc, r, s); MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc); ADD2 (dc, dcc, c2.x, cc2.x, dc, dcc, r, s); MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc); MUL2 (cs, ccs, ds, dss, e, ee, p, hx, tx, hy, ty, q, c, cc); MUL2 (dc, dcc, sn, ssn, dc, dcc, p, hx, tx, hy, ty, q, c, cc); SUB2 (e, ee, dc, dcc, e, ee, r, s); ADD2 (e, ee, sn, ssn, e, ee, r, s); /* e+ee=sin(x+dx) */ v[0] = e; v[1] = ee; }
void __dubcos(double x, double dx, double v[]) { double r,s,p,hx,tx,hy,ty,q,c,cc,d,dd,d2,dd2,e,ee, sn,ssn,cs,ccs,ds,dss,dc,dcc; #if 0 double xx,y,yy,z,zz; #endif mynumber u; int4 k; u.x=x+big.x; k = u.i[LOW_HALF]<<2; x=x-(u.x-big.x); d=x+dx; dd=(x-d)+dx; /* cos(x+dx)=cos(Xi+t)=cos(Xi)cos(t) - sin(Xi)sin(t) */ MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc); sn=sincos.x[k]; /* */ ssn=sincos.x[k+1]; /* sin(Xi) and cos(Xi) */ cs=sincos.x[k+2]; /* */ ccs=sincos.x[k+3]; /* */ MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc); ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s); MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); ADD2(ds,dss,s3.x,ss3.x,ds,dss,r,s); MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); ADD2(ds,dss,d,dd,ds,dss,r,s); MUL2(d2,dd2,c8.x,cc8.x,dc,dcc,p,hx,tx,hy,ty,q,c,cc); ADD2(dc,dcc,c6.x,cc6.x,dc,dcc,r,s); MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); ADD2(dc,dcc,c4.x,cc4.x,dc,dcc,r,s); MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); ADD2(dc,dcc,c2.x,cc2.x,dc,dcc,r,s); MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); MUL2(cs,ccs,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc); MUL2(dc,dcc,sn,ssn,dc,dcc,p,hx,tx,hy,ty,q,c,cc); MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc); ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s); MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); ADD2(ds,dss,s3.x,ss3.x,ds,dss,r,s); MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); ADD2(ds,dss,d,dd,ds,dss,r,s); MUL2(d2,dd2,c8.x,cc8.x,dc,dcc,p,hx,tx,hy,ty,q,c,cc); ADD2(dc,dcc,c6.x,cc6.x,dc,dcc,r,s); MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); ADD2(dc,dcc,c4.x,cc4.x,dc,dcc,r,s); MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); ADD2(dc,dcc,c2.x,cc2.x,dc,dcc,r,s); MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); MUL2(sn,ssn,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc); MUL2(dc,dcc,cs,ccs,dc,dcc,p,hx,tx,hy,ty,q,c,cc); ADD2(e,ee,dc,dcc,e,ee,r,s); SUB2(cs,ccs,e,ee,e,ee,r,s); v[0]=e; v[1]=ee; }
void BTR_initTree (SC_scheduler scheduler, BTR_tree tree) { if (tree->size == 1) { tree->nodes[0][0].value = 0; tree->nodes[0][0].father = 0; tree->nodes[0][0].size = 1; tree->nodes[0][0].visit[0] = NOT_EQUAL; tree->nodes[0][0].visit[1] = NOT_EQUAL; tree->nodes[0][0].visit[2] = NOT_EQUAL; tree->father[0] = 0; return; } unsigned int j, i, size, rightChild, leftChild; int k; int *numberOfNodes; if (tree->size == 2 || tree->size == 3) { numberOfNodes = checkedMalloc (sizeof(int)); numberOfNodes[0] = 0; } else { numberOfNodes = checkedMalloc ((tree->height + 1) * sizeof(int)); } size = DIV2(tree->size); for (j = 0; j < tree->size; j++) { tree->father[j] = DIV2(j); } if (tree->size & ODD) { tree->father[tree->size - 1]--; } if (tree->height) { for (j = 0; j <= tree->height; j++) { numberOfNodes[j] = size - 1; size = DIV2(size); } } for (j = 0; j <= numberOfNodes[0]; j++) { leftChild = MUL2(j); rightChild = MUL2(j) + 1; tree->nodes[0][j].size = 2; tree->nodes[0][j].father = DIV2(j); tree->nodes[0][j].value = MIN(tree->leaves[leftChild], leftChild, tree->leaves[rightChild], rightChild); tree->nodes[0][j].visit[0] = tree->nodes[0][j].value; if (tree->leaves[leftChild] == tree->leaves[rightChild]) { tree->nodes[0][j].visit[1] = rightChild; } else { tree->nodes[0][j].visit[1] = NOT_EQUAL; } } if ((numberOfNodes[0] + 1) & ODD) { tree->nodes[0][numberOfNodes[0]].father--; } if (tree->size & ODD) { j = numberOfNodes[0]; tree->nodes[0][numberOfNodes[0]].size++; rightChild = MUL2(numberOfNodes[0]) + 2; k = tree->nodes[0][numberOfNodes[0]].value; if (tree->leaves[k] == tree->leaves[rightChild]) { if (tree->nodes[0][j].visit[1] != NOT_EQUAL) { tree->nodes[0][j].visit[2] = rightChild; } else { tree->nodes[0][j].visit[1] = rightChild; tree->nodes[0][j].visit[2] = NOT_EQUAL; } } else { tree->nodes[0][j].visit[2] = NOT_EQUAL; tree->nodes[0][numberOfNodes[0]].value = MIN(tree->leaves[k], k, tree->leaves[rightChild], rightChild); if (tree->nodes[0][numberOfNodes[0]].value == rightChild) { tree->nodes[0][j].visit[0] = rightChild; tree->nodes[0][j].visit[1] = NOT_EQUAL; } } } if (tree->height) { for (i = 1; i <= tree->height; i++) { for (j = 0; j <= numberOfNodes[i]; j++) { leftChild = tree->nodes[i - 1][MUL2(j)].value; rightChild = tree->nodes[i - 1][MUL2(j) + 1].value; tree->nodes[i][j].size = 2; tree->nodes[i][j].father = DIV2(j); tree->nodes[i][j].value = MIN(tree->leaves[leftChild], leftChild, tree->leaves[rightChild], rightChild); tree->nodes[i][j].visit[0] = MIN(tree->leaves[leftChild], MUL2(j), tree->leaves[rightChild], MUL2(j)+1); if (tree->leaves[leftChild] == tree->leaves[rightChild]) { tree->nodes[i][j].visit[1] = MUL2(j) + 1; } else { tree->nodes[i][j].visit[1] = NOT_EQUAL; } } if ((numberOfNodes[i] + 1) & ODD) { tree->nodes[i][numberOfNodes[i]].father--; } if ((numberOfNodes[i - 1] + 1) & ODD) { j = numberOfNodes[i]; tree->nodes[i][numberOfNodes[i]].size++; rightChild = tree->nodes[i - 1][MUL2(numberOfNodes[i]) + 2].value; k = tree->nodes[i][numberOfNodes[i]].value; if (tree->leaves[k] == tree->leaves[rightChild]) { if (tree->nodes[i][j].visit[1] != NOT_EQUAL) { tree->nodes[i][j].visit[2] = MUL2(numberOfNodes[i]) + 2; } else { tree->nodes[i][j].visit[1] = MUL2(numberOfNodes[i]) + 2; tree->nodes[i][j].visit[2] = NOT_EQUAL; } } else { tree->nodes[i][j].visit[2] = NOT_EQUAL; tree->nodes[i][numberOfNodes[i]].value = MIN( tree->leaves[k], k, tree->leaves[rightChild], rightChild); if (tree->nodes[i][numberOfNodes[i]].value == rightChild) { tree->nodes[i][j].visit[0] = MUL2(numberOfNodes[i]) + 2; tree->nodes[i][j].visit[1] = NOT_EQUAL; } } } } tree->numEquals = 0; tree->randomRange = 0; scheduler->state->visit->fathers[tree->height][0] = 0; int visitNodes = 1; for (i = tree->height; i >= 1; i--) { int nNodes = visitNodes; visitNodes = 0; for (j = 0; j < nNodes; j++) { int upd = scheduler->state->visit->fathers[i][j]; int cChilds = tree->nodes[i][upd].size; for (k = 0; k < cChilds; k++) { int add = tree->nodes[i][upd].visit[k]; if (add != NOT_EQUAL) { scheduler->state->visit->fathers[i - 1][visitNodes++] = add; } else { break; } } } } for (i = 0; i < visitNodes; i++) { int upd = scheduler->state->visit->fathers[0][i]; int cChilds = tree->nodes[0][upd].size; for (j = 0; j < cChilds; j++) { int add = tree->nodes[0][upd].visit[j]; if (add != NOT_EQUAL) { tree->equals[add] = add; tree->numEquals++; int g, w = tree->randomRange + tree->weights[add]; for (g = tree->randomRange; g < w; g++) { tree->weightedEquals[g] = add; } tree->randomRange += tree->weights[add]; } } } if (tree->numEquals > 1) { shuffle (tree->weightedEquals, tree->randomRange); int selected = tree->weightedEquals[0]; tree->nodes[tree->height][0].value = selected; tree->equals[selected] = NOT_ASSIGNED; tree->num = 1; tree->numEquals--; } else { tree->numEquals = 0; } } free (numberOfNodes); }
void BTR_updateTree (SC_scheduler scheduler, BTR_tree tree, int *inf, int cant, int idx, double *times) { if (tree->size == 1) return; int vars, nodes; unsigned int j, updateVar, minIdx; int i; nodes = 1; if (idx >= 0) { scheduler->state->visit->fathers[0][0] = tree->nodes[0][tree->father[idx]].father; updateVar = tree->father[idx]; minIdx = MUL2(updateVar); if (tree->leaves[minIdx] == tree->leaves[minIdx + 1]) { tree->nodes[0][updateVar].visit[1] = minIdx + 1; } else { tree->nodes[0][updateVar].visit[1] = NOT_EQUAL; } minIdx = MIN(tree->leaves[minIdx], minIdx, tree->leaves[minIdx + 1], minIdx + 1); tree->nodes[0][updateVar].visit[0] = minIdx; if (tree->nodes[0][updateVar].size & ODD) { int last = MUL2(updateVar) + 2; if (tree->leaves[minIdx] == tree->leaves[last]) { if (tree->nodes[0][updateVar].visit[1] != NOT_EQUAL) { tree->nodes[0][updateVar].visit[2] = last; } else { tree->nodes[0][updateVar].visit[1] = last; tree->nodes[0][updateVar].visit[2] = NOT_EQUAL; } } else { tree->nodes[0][updateVar].visit[2] = NOT_EQUAL; minIdx = MIN(tree->leaves[minIdx], minIdx, tree->leaves[last], last); if (minIdx == last) { tree->nodes[0][updateVar].visit[0] = last; tree->nodes[0][updateVar].visit[1] = NOT_EQUAL; } } } tree->nodes[0][updateVar].value = minIdx; } else { scheduler->state->visit->fathers[0][0] = tree->nodes[0][tree->father[inf[0]]].father; } vars = cant; for (i = 0; i < vars; i++) { updateVar = tree->father[inf[i]]; minIdx = MUL2(updateVar); if (tree->leaves[minIdx] == tree->leaves[minIdx + 1]) { tree->nodes[0][updateVar].visit[1] = minIdx + 1; } else { tree->nodes[0][updateVar].visit[1] = NOT_EQUAL; } minIdx = MIN(tree->leaves[minIdx], minIdx, tree->leaves[minIdx + 1], minIdx + 1); tree->nodes[0][updateVar].visit[0] = minIdx; if (tree->nodes[0][updateVar].size & ODD) { int last = MUL2(updateVar) + 2; if (tree->leaves[minIdx] == tree->leaves[last]) { if (tree->nodes[0][updateVar].visit[1] != NOT_EQUAL) { tree->nodes[0][updateVar].visit[2] = last; } else { tree->nodes[0][updateVar].visit[1] = last; tree->nodes[0][updateVar].visit[2] = NOT_EQUAL; } } else { tree->nodes[0][updateVar].visit[2] = NOT_EQUAL; minIdx = MIN(tree->leaves[minIdx], minIdx, tree->leaves[last], last); if (minIdx == last) { tree->nodes[0][updateVar].visit[0] = last; tree->nodes[0][updateVar].visit[1] = NOT_EQUAL; } } } tree->nodes[0][updateVar].value = minIdx; if (tree->nodes[0][updateVar].father != scheduler->state->visit->fathers[0][nodes - 1]) { scheduler->state->visit->fathers[0][nodes] = tree->nodes[0][updateVar].father; nodes++; } } for (j = 1; j <= tree->height; j++) { vars = nodes; nodes = 1; scheduler->state->visit->fathers[j][0] = tree->nodes[j][scheduler->state->visit->fathers[j - 1][0]].father; for (i = 0; i < vars; i++) { updateVar = scheduler->state->visit->fathers[j - 1][i]; minIdx = MUL2(updateVar); if (tree->leaves[tree->nodes[j - 1][minIdx].value] == tree->leaves[tree->nodes[j - 1][minIdx + 1].value]) { tree->nodes[j][updateVar].visit[1] = minIdx + 1; } else { tree->nodes[j][updateVar].visit[1] = NOT_EQUAL; } tree->nodes[j][updateVar].visit[0] = MIN( tree->leaves[tree->nodes[j - 1][minIdx].value], minIdx, tree->leaves[tree->nodes[j - 1][minIdx + 1].value], minIdx + 1); minIdx = MIN(tree->leaves[tree->nodes[j - 1][minIdx].value], tree->nodes[j - 1][minIdx].value, tree->leaves[tree->nodes[j - 1][minIdx + 1].value], tree->nodes[j - 1][minIdx + 1].value); if (tree->nodes[j][updateVar].size & ODD) { int last = MUL2(updateVar) + 2; if (tree->leaves[minIdx] == tree->leaves[tree->nodes[j - 1][last].value]) { if (tree->nodes[j][updateVar].visit[1] != NOT_EQUAL) { tree->nodes[j][updateVar].visit[2] = last; } else { tree->nodes[j][updateVar].visit[1] = last; tree->nodes[j][updateVar].visit[2] = NOT_EQUAL; } } else { tree->nodes[j][updateVar].visit[2] = NOT_EQUAL; minIdx = MIN(tree->leaves[minIdx], minIdx, tree->leaves[tree->nodes[j - 1][last].value], tree->nodes[j - 1][last].value); if (minIdx == tree->nodes[j - 1][last].value) { tree->nodes[j][updateVar].visit[0] = last; tree->nodes[j][updateVar].visit[1] = NOT_EQUAL; } } } tree->nodes[j][updateVar].value = minIdx; if (tree->nodes[j][updateVar].father != scheduler->state->visit->fathers[j][nodes - 1]) { scheduler->state->visit->fathers[j][nodes] = tree->nodes[j][updateVar].father; nodes++; } } } if (tree->numEquals) { int var = tree->weightedEquals[tree->num++]; while (tree->equals[var] == NOT_ASSIGNED) { var = tree->weightedEquals[tree->num++]; } tree->equals[var] = NOT_ASSIGNED; tree->nodes[tree->height][0].value = var; tree->numEquals--; } else { tree->minimum = tree->leaves[tree->nodes[tree->height][0].value]; tree->numEquals = 0; int nNodes; tree->randomRange = 0; int visitNodes = 1; scheduler->state->visit->fathers[tree->height][0] = 0; for (i = tree->height; i >= 1; i--) { int k; nNodes = visitNodes; visitNodes = 0; for (j = 0; j < nNodes; j++) { int upd = scheduler->state->visit->fathers[i][j]; int cChilds = tree->nodes[i][upd].size; for (k = 0; k < cChilds; k++) { int add = tree->nodes[i][upd].visit[k]; if (add != NOT_EQUAL) { scheduler->state->visit->fathers[i - 1][visitNodes++] = add; } else { break; } } } } for (i = 0; i < visitNodes; i++) { int upd = scheduler->state->visit->fathers[0][i]; int cChilds = tree->nodes[0][upd].size; for (j = 0; j < cChilds; j++) { int add = tree->nodes[0][upd].visit[j]; if (add != NOT_EQUAL) { tree->equals[add] = add; tree->numEquals++; int g, w = tree->randomRange + tree->weights[add]; for (g = tree->randomRange; g < w; g++) { tree->weightedEquals[g] = add; } tree->randomRange += tree->weights[add]; } else { break; } } } if (tree->numEquals > 1) { shuffle (tree->weightedEquals, tree->randomRange); int selected = tree->weightedEquals[0]; tree->nodes[tree->height][0].value = selected; tree->equals[selected] = NOT_ASSIGNED; tree->num = 1; tree->numEquals--; } else { tree->numEquals = 0; } } }
void __dubsin(Double x, Double dx, Double v[]) { Double r,s,p,hx,tx,hy,ty,q,c,cc,d,dd,d2,dd2,e,ee, sn,ssn,cs,ccs,ds,dss,dc,dcc; #if 0 Double xx,y,yy,z,zz; #endif mynumber u; int4 k; u.x()=x+big.x(); k = u.i[LOW_HALF]<<2; x=x-(u.x()-big.x()); d=x+dx; dd=(x-d)+dx; /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */ MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc); sn=sincos.x(k); /* */ ssn=sincos.x(k+1); /* sin(Xi) and cos(Xi) */ cs=sincos.x(k+2); /* */ ccs=sincos.x(k+3); /* */ MUL2(d2,dd2,s7.x(),ss7.x(),ds,dss,p,hx,tx,hy,ty,q,c,cc); /* Taylor */ ADD2(ds,dss,s5.x(),ss5.x(),ds,dss,r,s); MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* series */ ADD2(ds,dss,s3.x(),ss3.x(),ds,dss,r,s); MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* for sin */ MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); ADD2(ds,dss,d,dd,ds,dss,r,s); /* ds=sin(t) */ MUL2(d2,dd2,c8.x(),cc8.x(),dc,dcc,p,hx,tx,hy,ty,q,c,cc); ;/* Taylor */ ADD2(dc,dcc,c6.x(),cc6.x(),dc,dcc,r,s); MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); /* series */ ADD2(dc,dcc,c4.x(),cc4.x(),dc,dcc,r,s); MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); /* for cos */ ADD2(dc,dcc,c2.x(),cc2.x(),dc,dcc,r,s); MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); /* dc=cos(t) */ MUL2(cs,ccs,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc); MUL2(dc,dcc,sn,ssn,dc,dcc,p,hx,tx,hy,ty,q,c,cc); SUB2(e,ee,dc,dcc,e,ee,r,s); ADD2(e,ee,sn,ssn,e,ee,r,s); /* e+ee=sin(x+dx) */ v[0]=e; v[1]=ee; }
/* routine computes the correctly rounded (to nearest) value of atan(x). */ double atan (double x) { double cor, s1, ss1, s2, ss2, t1, t2, t3, t7, t8, t9, t10, u, u2, u3, v, vv, w, ww, y, yy, z, zz; #ifndef DLA_FMS double t4, t5, t6; #endif int i, ux, dx; static const int pr[M] = { 6, 8, 10, 32 }; number num; num.d = x; ux = num.i[HIGH_HALF]; dx = num.i[LOW_HALF]; /* x=NaN */ if (((ux & 0x7ff00000) == 0x7ff00000) && (((ux & 0x000fffff) | dx) != 0x00000000)) return x + x; /* Regular values of x, including denormals +-0 and +-INF */ SET_RESTORE_ROUND (FE_TONEAREST); u = (x < 0) ? -x : x; if (u < C) { if (u < B) { if (u < A) { math_check_force_underflow_nonneg (u); return x; } else { /* A <= u < B */ v = x * x; yy = d11.d + v * d13.d; yy = d9.d + v * yy; yy = d7.d + v * yy; yy = d5.d + v * yy; yy = d3.d + v * yy; yy *= x * v; if ((y = x + (yy - U1 * x)) == x + (yy + U1 * x)) return y; EMULV (x, x, v, vv, t1, t2, t3, t4, t5); /* v+vv=x^2 */ s1 = f17.d + v * f19.d; s1 = f15.d + v * s1; s1 = f13.d + v * s1; s1 = f11.d + v * s1; s1 *= v; ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); MUL2 (x, 0, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (x, 0, s2, ss2, s1, ss1, t1, t2); if ((y = s1 + (ss1 - U5 * s1)) == s1 + (ss1 + U5 * s1)) return y; return atanMp (x, pr); } } else { /* B <= u < C */ i = (TWO52 + TWO8 * u) - TWO52; i -= 16; z = u - cij[i][0].d; yy = cij[i][5].d + z * cij[i][6].d; yy = cij[i][4].d + z * yy; yy = cij[i][3].d + z * yy; yy = cij[i][2].d + z * yy; yy *= z; t1 = cij[i][1].d; if (i < 112) { if (i < 48) u2 = U21; /* u < 1/4 */ else u2 = U22; } /* 1/4 <= u < 1/2 */ else { if (i < 176) u2 = U23; /* 1/2 <= u < 3/4 */ else u2 = U24; } /* 3/4 <= u <= 1 */ if ((y = t1 + (yy - u2 * t1)) == t1 + (yy + u2 * t1)) return __signArctan (x, y); z = u - hij[i][0].d; s1 = hij[i][14].d + z * hij[i][15].d; s1 = hij[i][13].d + z * s1; s1 = hij[i][12].d + z * s1; s1 = hij[i][11].d + z * s1; s1 *= z; ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); if ((y = s2 + (ss2 - U6 * s2)) == s2 + (ss2 + U6 * s2)) return __signArctan (x, y); return atanMp (x, pr); } } else { if (u < D) { /* C <= u < D */ w = 1 / u; EMULV (w, u, t1, t2, t3, t4, t5, t6, t7); ww = w * ((1 - t1) - t2); i = (TWO52 + TWO8 * w) - TWO52; i -= 16; z = (w - cij[i][0].d) + ww; yy = cij[i][5].d + z * cij[i][6].d; yy = cij[i][4].d + z * yy; yy = cij[i][3].d + z * yy; yy = cij[i][2].d + z * yy; yy = HPI1 - z * yy; t1 = HPI - cij[i][1].d; if (i < 112) u3 = U31; /* w < 1/2 */ else u3 = U32; /* w >= 1/2 */ if ((y = t1 + (yy - u3)) == t1 + (yy + u3)) return __signArctan (x, y); DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10); t1 = w - hij[i][0].d; EADD (t1, ww, z, zz); s1 = hij[i][14].d + z * hij[i][15].d; s1 = hij[i][13].d + z * s1; s1 = hij[i][12].d + z * s1; s1 = hij[i][11].d + z * s1; s1 *= z; ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2); MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2); MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2); MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2); MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2); SUB2 (HPI, HPI1, s2, ss2, s1, ss1, t1, t2); if ((y = s1 + (ss1 - U7)) == s1 + (ss1 + U7)) return __signArctan (x, y); return atanMp (x, pr); } else { if (u < E) { /* D <= u < E */ w = 1 / u; v = w * w; EMULV (w, u, t1, t2, t3, t4, t5, t6, t7); yy = d11.d + v * d13.d; yy = d9.d + v * yy; yy = d7.d + v * yy; yy = d5.d + v * yy; yy = d3.d + v * yy; yy *= w * v; ww = w * ((1 - t1) - t2); ESUB (HPI, w, t3, cor); yy = ((HPI1 + cor) - ww) - yy; if ((y = t3 + (yy - U4)) == t3 + (yy + U4)) return __signArctan (x, y); DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10); MUL2 (w, ww, w, ww, v, vv, t1, t2, t3, t4, t5, t6, t7, t8); s1 = f17.d + v * f19.d; s1 = f15.d + v * s1; s1 = f13.d + v * s1; s1 = f11.d + v * s1; s1 *= v; ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2); MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8); MUL2 (w, ww, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8); ADD2 (w, ww, s2, ss2, s1, ss1, t1, t2); SUB2 (HPI, HPI1, s1, ss1, s2, ss2, t1, t2); if ((y = s2 + (ss2 - U8)) == s2 + (ss2 + U8)) return __signArctan (x, y); return atanMp (x, pr); } else { /* u >= E */ if (x > 0) return HPI; else return MHPI; } } } }