static inline A0 asin(const A0& a0) { A0 sign, x, z; // bf::tie(sign, x) = sign_and_abs(a0); x = nt2::abs(a0); sign = bitofsign(a0); if ((x < single_constant<A0,0x38d1b717>())) return a0; if ((x > One<A0>())) return Nan<A0>(); bool bx_larger_05 = (x > Half<A0>()); if (bx_larger_05) { z = Half<A0>()*oneminus(x); x = sqrt(z); } else { z = sqr(x); } A0 z1 = madd(z, single_constant<A0,0x3d2cb352>(), single_constant<A0,0x3cc617e3>()); z1 = madd(z1, z, single_constant<A0,0x3d3a3ec7>()); z1 = madd(z1, z, single_constant<A0,0x3d9980f6>()); z1 = madd(z1, z, single_constant<A0,0x3e2aaae4>()); z1 = madd(z1, z*x, x); if(bx_larger_05) { z1 = z1+z1; z1 = Pio_2<A0>()-z1; } return b_xor(z1, sign); }
// compute the center and radius of the circumscribed circle defined by the three vertices // i.e. the osculating circle of the three vertices nv_scalar nv_find_circ_circle( vec3& center, const vec3& v1, const vec3& v2, const vec3& v3) { vec3 e0; vec3 e1; nv_scalar d1, d2, d3; nv_scalar c1, c2, c3, oo_c; sub(e0, v3, v1); sub(e1, v2, v1); dot(d1, e0, e1); sub(e0, v3, v2); sub(e1, v1, v2); dot(d2, e0, e1); sub(e0, v1, v3); sub(e1, v2, v3); dot(d3, e0, e1); c1 = d2 * d3; c2 = d3 * d1; c3 = d1 * d2; oo_c = nv_one / (c1 + c2 + c3); mult(center,v1,c2 + c3); madd(center,v2,c3 + c1); madd(center,v3,c1 + c2); center *= oo_c * nv_zero_5; return nv_zero_5 * _sqrt((d1 + d2) * (d2 + d3) * (d3 + d1) * oo_c); }
// compute the center and radius of the inscribed circle defined by the three vertices nv_scalar nv_find_in_circle(vec3& center, const vec3& v1, const vec3& v2, const vec3& v3) { nv_scalar area = nv_area(v1, v2, v3); // if the area is null if (area < nv_eps) { center = v1; return nv_zero; } nv_scalar oo_perim = nv_one / nv_perimeter(v1, v2, v3); vec3 diff; sub(diff, v2, v3); mult(center, v1, nv_norm(diff)); sub(diff, v3, v1); madd(center, v2, nv_norm(diff)); sub(diff, v1, v2); madd(center, v3, nv_norm(diff)); center *= oo_perim; return nv_two * area * oo_perim; }
static inline A0 asin(const A0& a0) { A0 sign, x; // bf::tie(sign, x) = sign_and_abs(a0); x = abs(a0); sign = bitofsign(a0); A0 x_smaller_1e_4 = lt(x, single_constant<A0, 0x38d1b717>()); //1.0e-4f; A0 x_larger_05 = gt(x, Half<A0>()); A0 x_else = b_or(x_smaller_1e_4, x_larger_05); A0 a = b_and(x, x_smaller_1e_4); A0 b = b_and(Half<A0>()*oneminus(x), x_larger_05); A0 z = b_or(b_or(b_notand(x_else, sqr(x)), a), b); x = b_notand(x_else, x); a = b_and(sqrt(z), x_larger_05); x = b_or(a, x); A0 z1 = madd(z, single_constant<A0, 0x3d2cb352>(), single_constant<A0, 0x3cc617e3>()); z1 = madd(z1, z, single_constant<A0, 0x3d3a3ec7>()); z1 = madd(z1, z, single_constant<A0, 0x3d9980f6>()); z1 = madd(z1, z, single_constant<A0, 0x3e2aaae4>()); z1 = madd(z1, z*x, x); z = select(x_smaller_1e_4, z, z1); z1 = z+z; z1 = Pio_2<A0>()-z1; z = select(x_larger_05, z1, z); return b_xor(z, sign); }
mstrtoul(MINT *a, char *s, char **p, short int b) { MINT y, base; int c, dectop, alphatop; short qy; int i; mset(0,a); MSET(b,&base); y.len = 1; y.val = &qy; dectop = (b <= 10) ? '0' + b - 1 : '9'; if (b > 10) alphatop = 'a' + b - 10; i=0; while (isxdigit(c=s[i++])) { if (isupper(c)) c = c - 'A' + 'a'; if (c >= '0' && c <= dectop) { qy = c - '0'; mmult(a,&base,a); if (qy != 0) madd(a,&y,a); continue; } if (b > 10 && (c >= 'a' && c <= alphatop)) { qy = c - 'a' + 10; mmult(a,&base,a); madd(a,&y,a); continue; } }; if (p!=NULL) (*p)=(char *)s+i-1; }
static inline A0 kernel_atan(const A0& a0) { if (is_eqz(a0)) return Zero<A0>(); if (is_inf(a0)) return Pio_2<A0>(); A0 x = nt2::abs(a0); A0 y; if( x >single_constant<A0,0x401a827a>())//2.414213562373095 ) /* tan 3pi/8 */ { y = Pio_2<A0>(); x = -rec(x); } else if( x > single_constant<A0,0x3ed413cd>()) //0.4142135623730950f ) /* tan pi/8 */ { y = Pio_4<A0>(); x = minusone(x)/oneplus(x); } else y = 0.0; A0 z = sqr(x); A0 z1 = madd(z, single_constant<A0,0x3da4f0d1>(),single_constant<A0,0xbe0e1b85>()); A0 z2 = madd(z, single_constant<A0,0x3e4c925f>(),single_constant<A0,0xbeaaaa2a>()); z1 = madd(z1, sqr(z), z2); return add(y, madd(x, mul( z1, z), x)); // y += // ((( 8.05374449538e-2 * z // - 1.38776856032E-1) * z // + 1.99777106478E-1) * z // - 3.33329491539E-1) * z * x // + x; }
static inline A0_n asin(const A0_n a0_n) { const A0 a0 = { a0_n }; A0 sign, x; x = nt2::abs(a0); sign = bitofsign(a0); const bA0 x_smaller_1e_4 = lt(x, single_constant<A0, 0x38d1b717>()); //1.0e-4f; const bA0 x_larger_05 = gt(x, Half<A0>()); const bA0 x_else = logical_or(x_smaller_1e_4, x_larger_05); A0 a = if_else_zero(x_smaller_1e_4, x); const A0 b = if_else_zero(x_larger_05, Half<A0>()*oneminus(x)); A0 z = b_or(b_or(if_zero_else(x_else, sqr(x)), a), b); x = if_zero_else(x_else, x); a = if_else_zero(x_larger_05, sqrt(z)); x = b_or(a, x); A0 z1 = madd(z, single_constant<A0, 0x3d2cb352>(), single_constant<A0, 0x3cc617e3>()); z1 = madd(z1, z, single_constant<A0, 0x3d3a3ec7>()); z1 = madd(z1, z, single_constant<A0, 0x3d9980f6>()); z1 = madd(z1, z, single_constant<A0, 0x3e2aaae4>()); z1 = madd(z1, z*x, x); z = select(x_smaller_1e_4, z, z1); z1 = z+z; z1 = Pio_2<A0>()-z1; z = select(x_larger_05, z1, z); return b_xor(z, sign); }
void QuadMesh::interpolate(const RTCInterpolateArguments* const args) { unsigned int primID = args->primID; float u = args->u; float v = args->v; RTCBufferType bufferType = args->bufferType; unsigned int bufferSlot = args->bufferSlot; float* P = args->P; float* dPdu = args->dPdu; float* dPdv = args->dPdv; float* ddPdudu = args->ddPdudu; float* ddPdvdv = args->ddPdvdv; float* ddPdudv = args->ddPdudv; unsigned int valueCount = args->valueCount; /* calculate base pointer and stride */ assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); const char* src = nullptr; size_t stride = 0; if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { src = vertexAttribs[bufferSlot].getPtr(); stride = vertexAttribs[bufferSlot].getStride(); } else { src = vertices[bufferSlot].getPtr(); stride = vertices[bufferSlot].getStride(); } for (unsigned int i=0; i<valueCount; i+=4) { const vbool4 valid = vint4((int)i)+vint4(step) < vint4(int(valueCount)); const size_t ofs = i*sizeof(float); const Quad& tri = quad(primID); const vfloat4 p0 = vfloat4::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]); const vfloat4 p1 = vfloat4::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]); const vfloat4 p2 = vfloat4::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]); const vfloat4 p3 = vfloat4::loadu(valid,(float*)&src[tri.v[3]*stride+ofs]); const vbool4 left = u+v <= 1.0f; const vfloat4 Q0 = select(left,p0,p2); const vfloat4 Q1 = select(left,p1,p3); const vfloat4 Q2 = select(left,p3,p1); const vfloat4 U = select(left,u,vfloat4(1.0f)-u); const vfloat4 V = select(left,v,vfloat4(1.0f)-v); const vfloat4 W = 1.0f-U-V; if (P) { vfloat4::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2))); } if (dPdu) { assert(dPdu); vfloat4::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)); assert(dPdv); vfloat4::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)); } if (ddPdudu) { assert(ddPdudu); vfloat4::storeu(valid,ddPdudu+i,vfloat4(zero)); assert(ddPdvdv); vfloat4::storeu(valid,ddPdvdv+i,vfloat4(zero)); assert(ddPdudv); vfloat4::storeu(valid,ddPdudv+i,vfloat4(zero)); } } }
static inline A0 approx(const A0& x) { const A0 x2 = sqr(x); A0 y1 = madd(Const<A0,0x3e5345fd>(), x2,Const<A0,0x3f95eceb>()); A0 y2 = madd(Const<A0,0x3f0ac229>(), x2,Const<A0,0x400237b4>()); y1 = madd(y1, x2, Const<A0,0x4029a924>()); y2 = madd(y2, x2, Const<A0,0x40135d8e>()); return oneplus(x*madd(x, y1, y2)); }
static inline A0 log2(const A0& a0) { A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(Mhalf<A0>(),x2, y); // multiply log of fraction by log2(e) A0 z = madd(x,single_constant<A0, 0x3ee2a8ed>(),mul(y,single_constant<A0, 0x3ee2a8ed>()));// 0.44269504088896340735992 A0 z1 = ((z+y)+x)+fe; A0 y1 = a0-rec(abs(a0)); // trick to reduce selection testing return seladd(is_inf(y1),b_or(z1, b_or(is_ltz(a0), is_nan(a0))),y1); }
static inline A0 log(const A0& a0) { A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(fe, single_constant<A0, 0xb95e8083>(), y); y = madd(Mhalf<A0>(), x2, y); A0 z = x + y; A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z); y2 = if_nan_else(logical_or(is_ltz(a0), is_nan(a0)), y2); return seladd(is_inf(y1), y2, y1); }
static inline A0 log(const A0& a0) { typedef typename meta::strip<A0>::type stA0; if (a0 == Inf<stA0>()) return a0; if (is_eqz(a0)) return Minf<stA0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(fe, single_constant<stA0, 0xb95e8083>(), y); y = madd(Mhalf<stA0>(), x2, y); A0 z = x + y; return madd(single_constant<stA0, 0x3f318000>(), fe, z); }
void QuadMesh::interpolate(unsigned primID, float u, float v, RTCBufferType buffer, float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, size_t numFloats) { /* test if interpolation is enabled */ #if defined(DEBUG) if ((scene->aflags & RTC_INTERPOLATE) == 0) throw_RTCError(RTC_INVALID_OPERATION,"rtcInterpolate can only get called when RTC_INTERPOLATE is enabled for the scene"); #endif /* calculate base pointer and stride */ assert((buffer >= RTC_VERTEX_BUFFER0 && buffer < RTCBufferType(RTC_VERTEX_BUFFER0 + numTimeSteps)) || (buffer >= RTC_USER_VERTEX_BUFFER0 && buffer <= RTC_USER_VERTEX_BUFFER1)); const char* src = nullptr; size_t stride = 0; if (buffer >= RTC_USER_VERTEX_BUFFER0) { src = userbuffers[buffer&0xFFFF].getPtr(); stride = userbuffers[buffer&0xFFFF].getStride(); } else { src = vertices[buffer&0xFFFF].getPtr(); stride = vertices[buffer&0xFFFF].getStride(); } for (size_t i=0; i<numFloats; i+=VSIZEX) { const vboolx valid = vintx((int)i)+vintx(step) < vintx(int(numFloats)); const size_t ofs = i*sizeof(float); const Quad& tri = quad(primID); const vfloatx p0 = vfloatx::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]); const vfloatx p1 = vfloatx::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]); const vfloatx p2 = vfloatx::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]); const vfloatx p3 = vfloatx::loadu(valid,(float*)&src[tri.v[3]*stride+ofs]); const vboolx left = u+v <= 1.0f; const vfloatx Q0 = select(left,p0,p2); const vfloatx Q1 = select(left,p1,p3); const vfloatx Q2 = select(left,p3,p1); const vfloatx U = select(left,u,vfloatx(1.0f)-u); const vfloatx V = select(left,v,vfloatx(1.0f)-v); const vfloatx W = 1.0f-U-V; if (P) { vfloatx::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2))); } if (dPdu) { assert(dPdu); vfloatx::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)); assert(dPdv); vfloatx::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)); } if (ddPdudu) { assert(ddPdudu); vfloatx::storeu(valid,ddPdudu+i,vfloatx(zero)); assert(ddPdvdv); vfloatx::storeu(valid,ddPdvdv+i,vfloatx(zero)); assert(ddPdudv); vfloatx::storeu(valid,ddPdudv+i,vfloatx(zero)); } } }
static inline void kernel_log(const A0& a0, A0& fe, A0& x, A0& x2, A0& y) { typedef typename meta::as_integer<A0, signed>::type int_type; int_type e; boost::fusion::tie(x, e) = fast_frexp(a0); // std::cout << "x " << x << " e " << e << std::endl; // bf::tie(x, e) = frexp(a0); // std::cout << "x " << x << " e " << e << std::endl; int_type x_lt_sqrthf = nt2::simd::native_cast<int_type>(gt(single_constant<A0, 0x3f3504f3>(),x)); e = e+x_lt_sqrthf; x = x+b_and(x, x_lt_sqrthf)+single_constant<A0, 0xbf800000>(); x2 = sqr(x); A0 y1 = madd(single_constant<A0, 0x3d9021bb>() ,x2,single_constant<A0, 0x3def251a>() ); A0 y2 = madd(single_constant<A0, 0xbdebd1b8>() ,x2,single_constant<A0, 0xbdfe5d4f>() ); y1 = madd(y1,x2,single_constant<A0, 0x3e11e9bf>() ); y2 = madd(y2,x2,single_constant<A0, 0xbe2aae50>() ); y1 = madd(y1,x2,single_constant<A0, 0x3e4cceac>() ); y2 = madd(y2,x2,single_constant<A0, 0xbe7ffffc>() ); y1 = madd(y1,x2,single_constant<A0, 0x3eaaaaaa>() ); y = madd(x,y2,y1)*x*x2; fe = tofloat(e); }
static inline void kernel_log(const A0& a0, A0& fe, A0& x, A0& x2, A0& y) { typedef typename meta::as_integer<A0, signed>::type int_type; typedef typename meta::strip<A0>::type stA0; int_type e; boost::fusion::vector_tie(x, e) = fast_frexp(a0); int_type x_lt_sqrthf = -(single_constant<stA0, 0x3f3504f3>() > x); e += x_lt_sqrthf; // if (x_lt_sqrthf) x+= x; // x += single_constant<A0, 0xbf800000>(); x += b_and(x, x_lt_sqrthf)+single_constant<stA0,0xbf800000>(); x2 = sqr(x); A0 y1 = madd(single_constant<stA0, 0x3d9021bb>() ,x2,single_constant<stA0, 0x3def251a>() ); A0 y2 = madd(single_constant<stA0, 0xbdebd1b8>() ,x2,single_constant<stA0, 0xbdfe5d4f>() ); y1 = madd(y1,x2,single_constant<stA0, 0x3e11e9bf>() ); y2 = madd(y2,x2,single_constant<stA0, 0xbe2aae50>() ); y1 = madd(y1,x2,single_constant<stA0, 0x3e4cceac>() ); y2 = madd(y2,x2,single_constant<stA0, 0xbe7ffffc>() ); y1 = madd(y1,x2,single_constant<stA0, 0x3eaaaaaa>() ); y = madd(x,y2,y1)*x*x2; fe = tofloat(e); }
int main (int argc, char *argv[]) { MINT *a, *b, *c, *d; short h; mp_set_memory_functions (NULL, NULL, NULL); a = itom (123); b = xtom ("DEADBEEF"); c = itom (0); d = itom (0); move (a, b); madd (a, b, c); msub (a, b, c); mult (a, b, c); mdiv (b, a, c, d); sdiv (b, 2, c, &h); msqrt (a, c, d); pow (b, a, a, c); rpow (a, 3, c); gcd (a, b, c); mcmp (a, b); if (argc > 1) { min (c); mout (a); } mtox (b); mfree(a); exit (0); }
static inline A0 log2(const A0& a0) { typedef typename meta::strip<A0>::type stA0; if (a0 == Inf<stA0>()) return a0; if (is_eqz(a0)) return Minf<stA0>(); if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>(); A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(Mhalf<stA0>(),x2, y); // multiply log of fraction by log2(e) A0 z = madd( x , single_constant<stA0, 0x3ee2a8ed>() , mul(y,single_constant<stA0, 0x3ee2a8ed>())// 0.44269504088896340735992 ); return ((z+y)+x)+fe; }
vec3 & reflect(vec3& r, const vec3& n, const vec3& l) { nv_scalar n_dot_l; n_dot_l = nv_two * dot(n_dot_l,n,l); mult(r,l,-nv_one); madd(r,n,n_dot_l); return r; }
static inline A0 log(const A0& a0) { A0 x, fe, x2, y; kernel_log(a0, fe, x, x2, y); y = madd(fe, single_constant<A0, 0xb95e8083>(), y); y = madd(Mhalf<A0>(), x2, y); A0 z = x + y; // std::cout << "fe " << fe << std::endl; // std::cout << "z " << z << std::endl; // std::cout << "a0 " << a0 << std::endl; // std::cout << "rec(a0) " << rec(a0) << std::endl; A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z); // std::cout << "y1 " << y1 << std::endl; // std::cout << "y2 " << y2 << std::endl; return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1); }
int main(int argc, const char * argv[]) { testcout(); double c = madd(1.0, 2.0); printf("1.0 + 2.0 = %f\n", c); return 0; }
static inline A0_n kernel_atan(const A0_n a0_n) { const A0 a0 = {a0_n}; const A0 x = nt2::abs(a0); //here x is positive const bA0 flag1 = lt(x, single_constant<A0, 0x401a827a>()); //tan3pio8); const bA0 flag2 = logical_and(ge(x, single_constant<A0, 0x3ed413cd>()), flag1); A0 yy = if_zero_else(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, (minusone(x)/oneplus(x)),xx); const A0 z = sqr(xx); A0 z1 = madd(z, single_constant<A0, 0x3da4f0d1>(),single_constant<A0, 0xbe0e1b85>()); const A0 z2 = madd(z, single_constant<A0, 0x3e4c925f>(),single_constant<A0, 0xbeaaaa2a>()); z1 = madd(z1, sqr(z), z2); return add(yy, madd(xx, mul( z1, z), xx)); }
static inline A0 atan(const A0& a0) { A0 x, sign; x = nt2::abs(a0); sign = bitofsign(a0); // bf::tie(sign, x) = sign_and_abs(a0); const A0 flag1 = lt(x, single_constant<A0, 0x401a827a>()); //tan3pio8); const A0 flag2 = b_and(ge(x, single_constant<A0, 0x3ed413cd>()), flag1); A0 yy = b_notand(flag1, Pio_2<A0>()); yy = select(flag2, Pio_4<A0>(), yy); A0 xx = select(flag1, x, -rec(x)); xx = select(flag2, (minusone(x)/oneplus(x)),xx); const A0 z = sqr(xx); A0 z1 = madd(z, single_constant<A0, 0x3da4f0d1>(),single_constant<A0, 0xbe0e1b85>()); A0 z2 = madd(z, single_constant<A0, 0x3e4c925f>(),single_constant<A0, 0xbeaaaa2a>()); z1 = madd(z1, sqr(z), z2); yy = add(yy, madd(xx, mul( z1, z), xx)); return b_xor(yy, sign); }
static inline A0 sin_eval(const A0& z, const A0& x) { const A0 y1 = horner< NT2_HORNER_COEFF_T(stype, 6, (0x3de5d8fd1fcf0ec1ll, 0xbe5ae5e5a9291691ll, 0x3ec71de3567d4896ll, 0xbf2a01a019bfdf03ll, 0x3f8111111110f7d0ll, 0xbfc5555555555548ll) ) > (z); return madd(mul(y1,z),x,x); }
void mpolsub(MPOL *p, MPOL *q, MPOL *r) { register ip=0,iq=0,is=0; MPOL s; POL_ALLOC(&s,p->nterms + q->nterms); while ((ip<p->nterms)&&(iq<q->nterms)) { #if (! INTR) (*PollPtr)(); #endif switch ((*cmp_exp)(MEXPO(p,ip),MEXPO(q,iq))) { case 1 : expocopy(MEXPO(p,ip),MEXPO(&s,is)); MCOPY(&(p->coefs[ip]),&(s.coefs[is])); ip++;is++;break; case -1 : expocopy(MEXPO(q,iq),MEXPO(&s,is)); MCOPY(&(q->coefs[iq]),&(s.coefs[is])); mnegate(&(s.coefs[is])); iq++;is++;break; case 0 : MCOPY(&(q->coefs[iq]),&(s.coefs[is])); mnegate(&(s.coefs[is])); madd(&(p->coefs[ip]),&(s.coefs[is]),&(s.coefs[is])); if (mtest(&(s.coefs[is]))) { expocopy(MEXPO(p,ip),MEXPO(&s,is)); is++; }; ip++;iq++; }; }; while (ip<p->nterms) { #if (! INTR) (*PollPtr)(); #endif expocopy(MEXPO(p,ip),MEXPO(&s,is)); MCOPY(&(p->coefs[ip]),&(s.coefs[is])); ip++; is++; }; while (iq<q->nterms) { #if (! INTR) (*PollPtr)(); #endif expocopy(MEXPO(q,iq),MEXPO(&s,is)); MCOPY(&(q->coefs[iq]),&(s.coefs[is])); mnegate(&(s.coefs[is])); iq++; is++; }; s.nterms = is; if (is==0){ xfree((char *)s.coefs); xfree((char *)s.expos); }; mpolfree(r); MPOLMOVEFREE(&s,r); };
always_inline VecType vec_reciprocal_newton(VecType arg) { const VecType one = VecType::gen_one(); const VecType approx = fast_reciprocal(arg); // One round of Newton-Raphson refinement const VecType diff = one - approx * arg; const VecType result = madd(diff, approx, approx); return result; }
static inline A0 asin(const A0& a0) { A0 x = nt2::abs(a0); if ((x > One<A0>())) return Nan<A0>(); if ((x < Sqrteps<A0>())) return a0; A0 zz; if((x > double_constant<double,0x3fe4000000000000ll> ())) //0.625; { zz = oneminus(x); const A0 vp = zz*horner< NT2_HORNER_COEFF_T(stype, 5, (0x3f684fc3988e9f08ll, 0xbfe2079259f9290fll, 0x401bdff5baf33e6all, 0xc03991aaac01ab68ll, 0x403c896240f3081dll) )>(zz)/ horner< NT2_HORNER_COEFF_T(stype, 5, (0x3ff0000000000000ll, 0xc035f2a2b6bf5d8cll, 0x40626219af6a7f42ll, 0xc077fe08959063eell, 0x40756709b0b644bell) )>(zz); zz = sqrt(zz+zz); A0 z = Pio_4<A0>()-zz; zz = madd(zz, vp, double_constant<double,0xbc91a62633145c07ll>()); z = z-zz; zz = z+Pio_4<A0>(); } else { zz = sqr(x); A0 z = zz*horner< NT2_HORNER_COEFF_T(stype, 6, (0x3f716b9b0bd48ad3ll, 0xbfe34341333e5c16ll, 0x4015c74b178a2dd9ll, 0xc0304331de27907bll, 0x40339007da779259ll, 0xc020656c06ceafd5ll) )>(zz)/ horner< NT2_HORNER_COEFF_T(stype, 6, (0x3ff0000000000000ll, 0xc02d7b590b5e0eabll, 0x40519fc025fe9054ll, 0xc06265bb6d3576d7ll, 0x4061705684ffbf9dll, 0xc04898220a3607acll) )>(zz); zz = x*z+x; } return b_xor(bitofsign(a0), zz); }
void main() { int a[M]={1,2,3,4,5,6,7,8,9,10}; int b[M]={1,1,1,1,1,1,1,1,1,1}; int c1[N][N],c2[N][N]; madd(a,b,c1); mult(a,b,c2); printf("a¾ØÕó:\n");disp1(a); printf("b¾ØÕó:\n");disp1(b); printf("a+b:\n");disp2(c1); printf("a¡Áb:\n");disp2(c2); printf("\n"); }
/* thread function */ void* threadfunc(void* arg) { long long iterations = (long long)arg; long long i; if (sync == 'm') { for (i = 0; i < iterations; i++) { madd(&counter, 1); } for (i = 0; i < iterations; i++) { madd(&counter, -1); } } else if (sync == 's') { for (i = 0; i < iterations; i++) { sadd(&counter, 1); } for (i = 0; i < iterations; i++) { sadd(&counter, -1); } } else if (sync == 'c') { for (i = 0; i < iterations; i++) { cadd(&counter, 1); } for (i = 0; i < iterations; i++) { cadd(&counter, -1); } } else { for (i = 0; i < iterations; i++) { add(&counter, 1); } for (i = 0; i < iterations; i++) { add(&counter, -1); } } }
FN minvert(MINT *a, MINT *b, MINT *c) { MINT x, y, z, w, Anew, Aold; int i = 0; static MINT one; static int oneinit = 1; if (oneinit) { oneinit = 0; MSET(1,&one); } MINIT(&x); MINIT(&y); MINIT(&z); MINIT(&w); MINIT(&Aold); MSET (1,&Anew); mcopy(b, &x); mcopy(a, &y); /* * Loop invariant: * * y = -1^i * Anew * a mod b */ while(mtest(&y) != 0) { mdiv(&x, &y, &w, &z); mcopy(&Anew, &x); mmult(&w, &Anew, &Anew); madd(&Anew, &Aold, &Anew); mmove(&x, &Aold); mmove(&y, &x); mmove(&z, &y); i++; } if (mcmp(&one,&x)) { mcopy(&one,c); } else { mmove(&Aold, c); if( (i&01) == 0) msub(b, c, c); } MFREE(&x); MFREE(&y); MFREE(&z); MFREE(&w); MFREE(&Aold); MFREE(&Anew); }
static inline A0 atan(const A0& a0) { // static const A0 tanpio8 = double_constant<double, 0x3fda827999fcef31ll>(); if (is_eqz(a0)) return a0; if (is_inf(a0)) return Pio_2<A0>()*sign(a0); A0 x = nt2::abs(a0); A0 y; A0 flag = (x > double_constant<double,0x4003504f333f9de6ll>()); if (flag) { y = Pio_2<A0>(); x = -rec(x); } else if ((x <= double_constant<double,0x3fe51eb851eb851fll>())) { y = Zero<A0>(); } else { y = Pio_4<A0>(); flag = Half<A0>(); x = minusone(x)/oneplus(x); } A0 z = sqr(x); z = z*horner< NT2_HORNER_COEFF_T(stype, 5, (0xbfec007fa1f72594ll, 0xc03028545b6b807all, 0xc052c08c36880273ll, 0xc05eb8bf2d05ba25ll, 0xc0503669fd28ec8ell) )>(z)/ horner< NT2_HORNER_COEFF_T(stype, 6, (0x3ff0000000000000ll, 0x4038dbc45b14603cll, 0x4064a0dd43b8fa25ll, 0x407b0e18d2e2be3bll, 0x407e563f13b049eall, 0x4068519efbbd62ecll) )>(z); z = madd(x, z, x); static const A0 morebits = double_constant<double,0x3c91a62633145c07ll>(); z += flag * morebits; y = y + z; if( is_ltz(a0) ) y = -y; return(y); }