예제 #1
0
파일: f_invtrig.hpp 프로젝트: KWMalik/nt2
 static inline A0 asin(const  A0& a0)
 {
   A0 sign, x, z;
   //    bf::tie(sign, x) = sign_and_abs(a0);
   x = nt2::abs(a0);
   sign = bitofsign(a0);
   if ((x < single_constant<A0,0x38d1b717>())) return a0;
   if ((x >  One<A0>())) return Nan<A0>();
   bool bx_larger_05    = (x > Half<A0>());
   if (bx_larger_05)
     {
       z = Half<A0>()*oneminus(x);
       x =  sqrt(z);
     }
   else
     {
       z = sqr(x);
     }
   A0 z1 = madd(z,  single_constant<A0,0x3d2cb352>(),
                single_constant<A0,0x3cc617e3>());
   z1 = madd(z1, z, single_constant<A0,0x3d3a3ec7>());
   z1 = madd(z1, z, single_constant<A0,0x3d9980f6>());
   z1 = madd(z1, z, single_constant<A0,0x3e2aaae4>());
   z1 = madd(z1, z*x, x);
   if(bx_larger_05)
     {
       z1 = z1+z1;
       z1 = Pio_2<A0>()-z1;
     }
   return b_xor(z1, sign);
 }
예제 #2
0
파일: nv_algebra.cpp 프로젝트: 2asoft/xray
// compute the center and radius of the circumscribed circle defined by the three vertices
// i.e. the osculating circle of the three vertices
nv_scalar nv_find_circ_circle( vec3& center, const vec3& v1, const vec3& v2, const vec3& v3)
{
    vec3 e0;
    vec3 e1;
    nv_scalar d1, d2, d3;
    nv_scalar c1, c2, c3, oo_c;

    sub(e0, v3, v1);
    sub(e1, v2, v1);
    dot(d1, e0, e1);

    sub(e0, v3, v2);
    sub(e1, v1, v2);
    dot(d2, e0, e1);

    sub(e0, v1, v3);
    sub(e1, v2, v3);
    dot(d3, e0, e1);

    c1 = d2 * d3;
    c2 = d3 * d1;
    c3 = d1 * d2;
    oo_c = nv_one / (c1 + c2 + c3);

    mult(center,v1,c2 + c3);
    madd(center,v2,c3 + c1);
    madd(center,v3,c1 + c2);
    center *= oo_c * nv_zero_5;
 
    return nv_zero_5 * _sqrt((d1 + d2) * (d2 + d3) * (d3 + d1) * oo_c);
}
예제 #3
0
파일: nv_algebra.cpp 프로젝트: 2asoft/xray
// compute the center and radius of the inscribed circle defined by the three vertices
nv_scalar nv_find_in_circle(vec3& center, const vec3& v1, const vec3& v2, const vec3& v3)
{
    nv_scalar area = nv_area(v1, v2, v3);
    // if the area is null
    if (area < nv_eps)
    {
        center = v1;
        return nv_zero;
    }

    nv_scalar oo_perim = nv_one / nv_perimeter(v1, v2, v3);

    vec3 diff;

    sub(diff, v2, v3);
    mult(center, v1, nv_norm(diff));

    sub(diff, v3, v1);
    madd(center, v2, nv_norm(diff));
    
    sub(diff, v1, v2);
    madd(center, v3, nv_norm(diff));

    center *= oo_perim;

    return nv_two * area * oo_perim;
}
예제 #4
0
파일: f_invtrig.hpp 프로젝트: Mathieu-/nt2
	static inline A0 asin(const  A0& a0)
	{
	  A0 sign, x;
	  //	bf::tie(sign, x) = sign_and_abs(a0);
	  x = abs(a0);
	  sign = bitofsign(a0);
	  A0 x_smaller_1e_4 = lt(x, single_constant<A0, 0x38d1b717>()); //1.0e-4f;
	  A0 x_larger_05    = gt(x, Half<A0>());
	  A0 x_else         = b_or(x_smaller_1e_4, x_larger_05);
	  A0 a = b_and(x, x_smaller_1e_4);
	  A0 b = b_and(Half<A0>()*oneminus(x), x_larger_05);
	  A0 z = b_or(b_or(b_notand(x_else, sqr(x)), a), b);
	  x = b_notand(x_else, x);
	  a = b_and(sqrt(z), x_larger_05);
	  x = b_or(a, x);
	  A0 z1 = madd(z,  single_constant<A0, 0x3d2cb352>(), single_constant<A0, 0x3cc617e3>());
	  z1 = madd(z1, z, single_constant<A0, 0x3d3a3ec7>());
	  z1 = madd(z1, z, single_constant<A0, 0x3d9980f6>());
	  z1 = madd(z1, z, single_constant<A0, 0x3e2aaae4>());
	  z1 = madd(z1, z*x, x);
	  z = select(x_smaller_1e_4, z, z1);
	  z1 = z+z;
	  z1 = Pio_2<A0>()-z1;
	  z = select(x_larger_05, z1, z);
	  return b_xor(z, sign);
	}
예제 #5
0
파일: Mstrtoul.c 프로젝트: 8l/csolve
mstrtoul(MINT *a, char *s, char **p, short int b)
{
	MINT	y, base;
	int	c, dectop, alphatop;
	short	qy;
	int	i;

	mset(0,a);
	MSET(b,&base);
	y.len	= 1;
	y.val	= &qy;
	dectop = (b <= 10) ? '0' + b - 1 : '9';
	if (b > 10) alphatop = 'a' + b - 10;

	i=0;
	while (isxdigit(c=s[i++])) {
		if (isupper(c)) c = c - 'A' + 'a';
		if (c >= '0' && c <= dectop) {
			qy = c - '0';
			mmult(a,&base,a);
			if (qy != 0) madd(a,&y,a);
			continue;
		} if (b > 10 && (c >= 'a' && c <= alphatop)) {
			qy = c - 'a' + 10;
			mmult(a,&base,a);
			madd(a,&y,a);
			continue;
		}
	};
	if (p!=NULL) (*p)=(char *)s+i-1;
}
예제 #6
0
파일: f_invtrig.hpp 프로젝트: KWMalik/nt2
        static inline A0 kernel_atan(const  A0& a0)
        {
          if (is_eqz(a0))  return Zero<A0>();
          if (is_inf(a0))  return Pio_2<A0>(); 
          A0 x = nt2::abs(a0);
          A0 y;   
          if( x >single_constant<A0,0x401a827a>())//2.414213562373095 )  /* tan 3pi/8 */
            {
              y = Pio_2<A0>();
              x = -rec(x);
            }

          else if( x > single_constant<A0,0x3ed413cd>()) //0.4142135623730950f ) /* tan pi/8 */
            {
              y = Pio_4<A0>();
              x = minusone(x)/oneplus(x);
            }
          else
            y = 0.0;

          A0 z = sqr(x);
          A0 z1 = madd(z,  single_constant<A0,0x3da4f0d1>(),single_constant<A0,0xbe0e1b85>());
          A0 z2 = madd(z,  single_constant<A0,0x3e4c925f>(),single_constant<A0,0xbeaaaa2a>());
          z1 = madd(z1, sqr(z), z2);
          return  add(y, madd(x, mul( z1, z), x));
//        y +=
//          ((( 8.05374449538e-2 * z
//              - 1.38776856032E-1) * z
//            + 1.99777106478E-1) * z
//           - 3.33329491539E-1) * z * x
//          + x;



        }
예제 #7
0
 static inline A0_n asin(const A0_n a0_n)
 {
   const A0 a0 = { a0_n };
   A0 sign, x;
   x = nt2::abs(a0);
   sign = bitofsign(a0);
   const bA0 x_smaller_1e_4 = lt(x, single_constant<A0, 0x38d1b717>()); //1.0e-4f;
   const bA0 x_larger_05    = gt(x, Half<A0>());
   const bA0 x_else         = logical_or(x_smaller_1e_4, x_larger_05);
   A0 a = if_else_zero(x_smaller_1e_4, x);
   const A0 b = if_else_zero(x_larger_05, Half<A0>()*oneminus(x));
   A0 z = b_or(b_or(if_zero_else(x_else, sqr(x)), a), b);
   x = if_zero_else(x_else, x);
   a = if_else_zero(x_larger_05, sqrt(z));
   x = b_or(a, x);
   A0 z1 = madd(z,  single_constant<A0, 0x3d2cb352>(), single_constant<A0, 0x3cc617e3>());
   z1 = madd(z1, z, single_constant<A0, 0x3d3a3ec7>());
   z1 = madd(z1, z, single_constant<A0, 0x3d9980f6>());
   z1 = madd(z1, z, single_constant<A0, 0x3e2aaae4>());
   z1 = madd(z1, z*x, x);
   z = select(x_smaller_1e_4, z, z1);
   z1 = z+z;
   z1 = Pio_2<A0>()-z1;
   z = select(x_larger_05, z1, z);
   return b_xor(z, sign);
 }
  void QuadMesh::interpolate(const RTCInterpolateArguments* const args)
  {
    unsigned int primID = args->primID;
    float u = args->u;
    float v = args->v;
    RTCBufferType bufferType = args->bufferType;
    unsigned int bufferSlot = args->bufferSlot;
    float* P = args->P;
    float* dPdu = args->dPdu;
    float* dPdv = args->dPdv;
    float* ddPdudu = args->ddPdudu;
    float* ddPdvdv = args->ddPdvdv;
    float* ddPdudv = args->ddPdudv;
    unsigned int valueCount = args->valueCount;

    /* calculate base pointer and stride */
    assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
           (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
    const char* src = nullptr; 
    size_t stride = 0;
    if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
      src    = vertexAttribs[bufferSlot].getPtr();
      stride = vertexAttribs[bufferSlot].getStride();
    } else {
      src    = vertices[bufferSlot].getPtr();
      stride = vertices[bufferSlot].getStride();
    }

    for (unsigned int i=0; i<valueCount; i+=4)
    {
      const vbool4 valid = vint4((int)i)+vint4(step) < vint4(int(valueCount));
      const size_t ofs = i*sizeof(float);
      const Quad& tri = quad(primID);
      const vfloat4 p0 = vfloat4::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]);
      const vfloat4 p1 = vfloat4::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]);
      const vfloat4 p2 = vfloat4::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]);
      const vfloat4 p3 = vfloat4::loadu(valid,(float*)&src[tri.v[3]*stride+ofs]);      
      const vbool4 left = u+v <= 1.0f;
      const vfloat4 Q0 = select(left,p0,p2);
      const vfloat4 Q1 = select(left,p1,p3);
      const vfloat4 Q2 = select(left,p3,p1);
      const vfloat4 U  = select(left,u,vfloat4(1.0f)-u);
      const vfloat4 V  = select(left,v,vfloat4(1.0f)-v);
      const vfloat4 W  = 1.0f-U-V;
      if (P) {
        vfloat4::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2)));
      }
      if (dPdu) { 
        assert(dPdu); vfloat4::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1));
        assert(dPdv); vfloat4::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2));
      }
      if (ddPdudu) { 
        assert(ddPdudu); vfloat4::storeu(valid,ddPdudu+i,vfloat4(zero));
        assert(ddPdvdv); vfloat4::storeu(valid,ddPdvdv+i,vfloat4(zero));
        assert(ddPdudv); vfloat4::storeu(valid,ddPdudv+i,vfloat4(zero));
      }
    }
  }
예제 #9
0
 static inline A0 approx(const A0& x)
 {
   const A0 x2 =  sqr(x);
   A0 y1 = madd(Const<A0,0x3e5345fd>(), x2,Const<A0,0x3f95eceb>());
   A0 y2 = madd(Const<A0,0x3f0ac229>(), x2,Const<A0,0x400237b4>());
   y1    = madd(y1, x2, Const<A0,0x4029a924>());
   y2    = madd(y2, x2, Const<A0,0x40135d8e>());
   return oneplus(x*madd(x, y1, y2));
 }
예제 #10
0
파일: f_log.hpp 프로젝트: francescog/nt2
	static inline A0 log2(const A0& a0)
	{
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y =  madd(Mhalf<A0>(),x2, y);
	  // multiply log of fraction by log2(e)
	  A0 z = madd(x,single_constant<A0, 0x3ee2a8ed>(),mul(y,single_constant<A0, 0x3ee2a8ed>()));// 0.44269504088896340735992
	  A0 z1 = ((z+y)+x)+fe;
	  A0 y1 = a0-rec(abs(a0)); // trick to reduce selection testing
	  return seladd(is_inf(y1),b_or(z1, b_or(is_ltz(a0), is_nan(a0))),y1);
	}
예제 #11
0
 static inline A0 log(const A0& a0)
 {
   A0 x, fe, x2, y;
   kernel_log(a0, fe, x, x2, y);
   y = madd(fe, single_constant<A0, 0xb95e8083>(), y);
   y = madd(Mhalf<A0>(), x2, y);
   A0 z  = x + y;
   A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing
   A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z);
   y2 = if_nan_else(logical_or(is_ltz(a0), is_nan(a0)), y2); 
   return seladd(is_inf(y1), y2, y1); 
 }
예제 #12
0
파일: f_log.hpp 프로젝트: ethanrublee/nt2
	static inline A0 log(const A0& a0)
	{
	  typedef typename meta::strip<A0>::type stA0; 
	  if (a0 == Inf<stA0>()) return a0;
	  if (is_eqz(a0)) return Minf<stA0>();
	  if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>();
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y = madd(fe, single_constant<stA0, 0xb95e8083>(), y);
	  y = madd(Mhalf<stA0>(), x2, y);
	  A0 z  = x + y;
	  return madd(single_constant<stA0, 0x3f318000>(), fe, z);
	}
예제 #13
0
  void QuadMesh::interpolate(unsigned primID, float u, float v, RTCBufferType buffer, float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, size_t numFloats)
  {
    /* test if interpolation is enabled */
#if defined(DEBUG)
    if ((scene->aflags & RTC_INTERPOLATE) == 0) 
      throw_RTCError(RTC_INVALID_OPERATION,"rtcInterpolate can only get called when RTC_INTERPOLATE is enabled for the scene");
#endif

    /* calculate base pointer and stride */
    assert((buffer >= RTC_VERTEX_BUFFER0 && buffer < RTCBufferType(RTC_VERTEX_BUFFER0 + numTimeSteps)) ||
           (buffer >= RTC_USER_VERTEX_BUFFER0 && buffer <= RTC_USER_VERTEX_BUFFER1));
    const char* src = nullptr; 
    size_t stride = 0;
    if (buffer >= RTC_USER_VERTEX_BUFFER0) {
      src    = userbuffers[buffer&0xFFFF].getPtr();
      stride = userbuffers[buffer&0xFFFF].getStride();
    } else {
      src    = vertices[buffer&0xFFFF].getPtr();
      stride = vertices[buffer&0xFFFF].getStride();
    }

    for (size_t i=0; i<numFloats; i+=VSIZEX)
    {
      const vboolx valid = vintx((int)i)+vintx(step) < vintx(int(numFloats));
      const size_t ofs = i*sizeof(float);
      const Quad& tri = quad(primID);
      const vfloatx p0 = vfloatx::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]);
      const vfloatx p1 = vfloatx::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]);
      const vfloatx p2 = vfloatx::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]);
      const vfloatx p3 = vfloatx::loadu(valid,(float*)&src[tri.v[3]*stride+ofs]);      
      const vboolx left = u+v <= 1.0f;
      const vfloatx Q0 = select(left,p0,p2);
      const vfloatx Q1 = select(left,p1,p3);
      const vfloatx Q2 = select(left,p3,p1);
      const vfloatx U  = select(left,u,vfloatx(1.0f)-u);
      const vfloatx V  = select(left,v,vfloatx(1.0f)-v);
      const vfloatx W  = 1.0f-U-V;
      if (P) {
        vfloatx::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2)));
      }
      if (dPdu) { 
        assert(dPdu); vfloatx::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1));
        assert(dPdv); vfloatx::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2));
      }
      if (ddPdudu) { 
        assert(ddPdudu); vfloatx::storeu(valid,ddPdudu+i,vfloatx(zero));
        assert(ddPdvdv); vfloatx::storeu(valid,ddPdvdv+i,vfloatx(zero));
        assert(ddPdudv); vfloatx::storeu(valid,ddPdudv+i,vfloatx(zero));
      }
    }
  }
예제 #14
0
파일: f_log.hpp 프로젝트: francescog/nt2
	static inline void kernel_log(const A0& a0,
				      A0& fe,
				      A0& x,
				      A0& x2,
				      A0& y)
	{
	  typedef typename meta::as_integer<A0, signed>::type int_type;
	  int_type e;
	  boost::fusion::tie(x, e) = fast_frexp(a0);
	  // 	std::cout << "x " << x << " e " << e << std::endl;
	  // 	bf::tie(x, e) = frexp(a0);
	  // 	std::cout << "x " << x << " e " << e << std::endl;
	  int_type x_lt_sqrthf = nt2::simd::native_cast<int_type>(gt(single_constant<A0, 0x3f3504f3>(),x));
	  e = e+x_lt_sqrthf;
	  x = x+b_and(x, x_lt_sqrthf)+single_constant<A0, 0xbf800000>();
	  x2 = sqr(x);
	  A0 y1 = madd(single_constant<A0, 0x3d9021bb>() ,x2,single_constant<A0, 0x3def251a>() );
	  A0 y2 = madd(single_constant<A0, 0xbdebd1b8>() ,x2,single_constant<A0, 0xbdfe5d4f>() );
	  y1 = madd(y1,x2,single_constant<A0, 0x3e11e9bf>() );
	  y2 = madd(y2,x2,single_constant<A0, 0xbe2aae50>() );
	  y1 = madd(y1,x2,single_constant<A0, 0x3e4cceac>() );
	  y2 = madd(y2,x2,single_constant<A0, 0xbe7ffffc>() );
	  y1 = madd(y1,x2,single_constant<A0, 0x3eaaaaaa>() );
	  y = madd(x,y2,y1)*x*x2;
	  fe = tofloat(e);
	}
예제 #15
0
파일: f_log.hpp 프로젝트: ethanrublee/nt2
	static inline void kernel_log(const A0& a0,
				      A0& fe,
				      A0& x,
				      A0& x2,
				      A0& y)
	{
	  typedef typename meta::as_integer<A0, signed>::type int_type;
	  typedef typename meta::strip<A0>::type stA0; 
	  int_type e;
	  boost::fusion::vector_tie(x, e) = fast_frexp(a0);
	  int_type x_lt_sqrthf = -(single_constant<stA0, 0x3f3504f3>() > x);
	  e += x_lt_sqrthf;
	  //     if (x_lt_sqrthf) x+= x;
	  //     x += single_constant<A0, 0xbf800000>();
	  x += b_and(x, x_lt_sqrthf)+single_constant<stA0,0xbf800000>();
	  x2 = sqr(x);
	  A0 y1 = madd(single_constant<stA0, 0x3d9021bb>() ,x2,single_constant<stA0, 0x3def251a>() );
	  A0 y2 = madd(single_constant<stA0, 0xbdebd1b8>() ,x2,single_constant<stA0, 0xbdfe5d4f>() );
	  y1 = madd(y1,x2,single_constant<stA0, 0x3e11e9bf>() );
	  y2 = madd(y2,x2,single_constant<stA0, 0xbe2aae50>() );
	  y1 = madd(y1,x2,single_constant<stA0, 0x3e4cceac>() );
	  y2 = madd(y2,x2,single_constant<stA0, 0xbe7ffffc>() );
	  y1 = madd(y1,x2,single_constant<stA0, 0x3eaaaaaa>() );
	  y = madd(x,y2,y1)*x*x2;
	  fe = tofloat(e);
	}
예제 #16
0
파일: allfuns.c 프로젝트: mahdiz/mpclib
int
main (int argc, char *argv[])
{
  MINT *a, *b, *c, *d;
  short  h;

  mp_set_memory_functions (NULL, NULL, NULL);
  a = itom (123);
  b = xtom ("DEADBEEF");
  c = itom (0);
  d = itom (0);
  move (a, b);
  madd (a, b, c);
  msub (a, b, c);
  mult (a, b, c);
  mdiv (b, a, c, d);
  sdiv (b, 2, c, &h);
  msqrt (a, c, d);
  pow (b, a, a, c);
  rpow (a, 3, c);
  gcd (a, b, c);
  mcmp (a, b);
  if (argc > 1)
    {
      min (c);
      mout (a);
    }
  mtox (b);
  mfree(a);

  exit (0);
}
예제 #17
0
파일: f_log.hpp 프로젝트: ethanrublee/nt2
	static inline A0 log2(const A0& a0)
	{
	  typedef typename meta::strip<A0>::type stA0; 
	  if (a0 == Inf<stA0>()) return a0;
	  if (is_eqz(a0)) return Minf<stA0>();
	  if (nt2::is_nan(a0)||is_ltz(a0)) return Nan<stA0>();
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y =  madd(Mhalf<stA0>(),x2, y);
	  // multiply log of fraction by log2(e)
	  A0 z = madd( x
		       , single_constant<stA0, 0x3ee2a8ed>()
		       , mul(y,single_constant<stA0, 0x3ee2a8ed>())// 0.44269504088896340735992
		       );
	  return ((z+y)+x)+fe;
	}
예제 #18
0
파일: nv_algebra.cpp 프로젝트: 2asoft/xray
vec3 & reflect(vec3& r, const vec3& n, const vec3& l)
{
    nv_scalar n_dot_l;
    n_dot_l = nv_two * dot(n_dot_l,n,l);
    mult(r,l,-nv_one);
    madd(r,n,n_dot_l);
    return r;
}
예제 #19
0
파일: f_log.hpp 프로젝트: francescog/nt2
	static inline A0 log(const A0& a0)
	{
	  A0 x, fe, x2, y;
	  kernel_log(a0, fe, x, x2, y);
	  y = madd(fe, single_constant<A0, 0xb95e8083>(), y);
	  y = madd(Mhalf<A0>(), x2, y);
	  A0 z  = x + y;
	  // 	std::cout << "fe " << fe << std::endl;
	  // 	std::cout << "z  " << z << std::endl;
	  // 	std::cout << "a0 " << a0 << std::endl;
	  // 	std::cout << "rec(a0) " << rec(a0) << std::endl;
	  A0 y1 = a0-rec(abs(a0));// trick to reduce selection testing
	  A0 y2 = madd(single_constant<A0, 0x3f318000>(), fe, z);
	  // 	std::cout << "y1 " << y1 << std::endl;
	  // 	std::cout << "y2 " << y2 << std::endl;
	  return seladd(is_inf(y1),b_or(y2, b_or(is_ltz(a0), is_nan(a0))),y1);
	}
예제 #20
0
int main(int argc, const char * argv[]) {
    
    testcout();
    double c = madd(1.0, 2.0);

    printf("1.0 + 2.0 = %f\n", c);

    return 0;
}
예제 #21
0
        static inline A0_n kernel_atan(const A0_n a0_n)
        {
          const A0 a0 = {a0_n};
          const A0 x = nt2::abs(a0);

          //here x is positive
          const bA0 flag1 = lt(x, single_constant<A0, 0x401a827a>()); //tan3pio8);
          const bA0 flag2 = logical_and(ge(x, single_constant<A0, 0x3ed413cd>()), flag1);
          A0 yy =  if_zero_else(flag1, Pio_2<A0>());
          yy =  select(flag2, Pio_4<A0>(), yy);
          A0 xx =   select(flag1, x, -rec(x));
          xx =  select(flag2, (minusone(x)/oneplus(x)),xx);
          const A0 z = sqr(xx);
          A0 z1 = madd(z,  single_constant<A0, 0x3da4f0d1>(),single_constant<A0, 0xbe0e1b85>());
          const A0 z2 = madd(z,  single_constant<A0, 0x3e4c925f>(),single_constant<A0, 0xbeaaaa2a>());
          z1 = madd(z1, sqr(z), z2);
          return  add(yy, madd(xx, mul( z1, z), xx));
        }
예제 #22
0
파일: f_invtrig.hpp 프로젝트: Mathieu-/nt2
	static inline A0 atan(const  A0& a0)
	{
	  A0 x, sign;
	  x = nt2::abs(a0);
	  sign = bitofsign(a0);
	  //	bf::tie(sign, x) = sign_and_abs(a0);
	  const A0 flag1 = lt(x, single_constant<A0, 0x401a827a>()); //tan3pio8);
	  const A0 flag2 = b_and(ge(x, single_constant<A0, 0x3ed413cd>()), flag1);
	  A0 yy =  b_notand(flag1, Pio_2<A0>());
	  yy =  select(flag2, Pio_4<A0>(), yy);
	  A0 xx =   select(flag1, x, -rec(x));
	  xx =  select(flag2, (minusone(x)/oneplus(x)),xx);
	  const A0 z = sqr(xx);
	  A0 z1 = madd(z,  single_constant<A0, 0x3da4f0d1>(),single_constant<A0, 0xbe0e1b85>());
	  A0 z2 = madd(z,  single_constant<A0, 0x3e4c925f>(),single_constant<A0, 0xbeaaaa2a>());
	  z1 = madd(z1, sqr(z), z2);
	  yy =  add(yy, madd(xx, mul( z1, z), xx));
	  return b_xor(yy, sign);
	}
예제 #23
0
 static inline A0 sin_eval(const A0& z, const A0& x)
 {
   const A0 y1 = horner< NT2_HORNER_COEFF_T(stype, 6, (0x3de5d8fd1fcf0ec1ll,
                                                      0xbe5ae5e5a9291691ll,
                                                      0x3ec71de3567d4896ll,
                                                      0xbf2a01a019bfdf03ll,
                                                      0x3f8111111110f7d0ll,
                                                      0xbfc5555555555548ll) ) > (z);
   return madd(mul(y1,z),x,x);
 }
예제 #24
0
파일: MPolSub.c 프로젝트: 8l/csolve
void mpolsub(MPOL *p, MPOL *q, MPOL *r)
{

  register ip=0,iq=0,is=0;
  MPOL s;

  POL_ALLOC(&s,p->nterms + q->nterms);
  while ((ip<p->nterms)&&(iq<q->nterms)) {
#if (! INTR)
    (*PollPtr)();
#endif
    switch ((*cmp_exp)(MEXPO(p,ip),MEXPO(q,iq))) {
    case 1 : expocopy(MEXPO(p,ip),MEXPO(&s,is));
      MCOPY(&(p->coefs[ip]),&(s.coefs[is]));
      ip++;is++;break;
    case -1 : expocopy(MEXPO(q,iq),MEXPO(&s,is));
      MCOPY(&(q->coefs[iq]),&(s.coefs[is]));
      mnegate(&(s.coefs[is]));
      iq++;is++;break;
    case 0 : MCOPY(&(q->coefs[iq]),&(s.coefs[is]));
      mnegate(&(s.coefs[is]));
      madd(&(p->coefs[ip]),&(s.coefs[is]),&(s.coefs[is]));		
      if (mtest(&(s.coefs[is]))) {
	expocopy(MEXPO(p,ip),MEXPO(&s,is));
	is++;
      };
      ip++;iq++;
    };
  };
  while (ip<p->nterms) {
#if (! INTR)
    (*PollPtr)();
#endif
    expocopy(MEXPO(p,ip),MEXPO(&s,is));
    MCOPY(&(p->coefs[ip]),&(s.coefs[is]));
    ip++; is++;
  };
  while (iq<q->nterms) {
#if (! INTR)
    (*PollPtr)();
#endif
    expocopy(MEXPO(q,iq),MEXPO(&s,is));
    MCOPY(&(q->coefs[iq]),&(s.coefs[is]));
    mnegate(&(s.coefs[is]));
    iq++; is++;
  };
  s.nterms = is;
  if (is==0){
    xfree((char *)s.coefs);
    xfree((char *)s.expos);
  };

  mpolfree(r);	
  MPOLMOVEFREE(&s,r);
};
예제 #25
0
always_inline VecType vec_reciprocal_newton(VecType arg)
{
    const VecType one = VecType::gen_one();

    const VecType approx = fast_reciprocal(arg);

    // One round of Newton-Raphson refinement
    const VecType diff = one - approx * arg;
    const VecType result = madd(diff, approx, approx);

    return result;
}
예제 #26
0
파일: d_invtrig.hpp 프로젝트: msuchard/nt2
	static inline A0 asin(const  A0& a0)
	{
	  A0 x = nt2::abs(a0);
	  if ((x >  One<A0>())) return Nan<A0>();
	  if ((x <  Sqrteps<A0>())) return a0;
	  A0 zz;
	  if((x >  double_constant<double,0x3fe4000000000000ll> ())) //0.625;
	    {
	      zz = oneminus(x);
	      const A0 vp = zz*horner< NT2_HORNER_COEFF_T(stype, 5,
                   (0x3f684fc3988e9f08ll,
                    0xbfe2079259f9290fll,
                    0x401bdff5baf33e6all,
                    0xc03991aaac01ab68ll,
                    0x403c896240f3081dll)
                   )>(zz)/
		horner< NT2_HORNER_COEFF_T(stype, 5,
                   (0x3ff0000000000000ll,
                    0xc035f2a2b6bf5d8cll,
                    0x40626219af6a7f42ll,
                    0xc077fe08959063eell,
                    0x40756709b0b644bell)
                     )>(zz);
	      zz =  sqrt(zz+zz);
	      A0 z = Pio_4<A0>()-zz;
	      zz = madd(zz, vp, double_constant<double,0xbc91a62633145c07ll>());
	      z =  z-zz;
	      zz = z+Pio_4<A0>();
	    }
	  else
	    {
	      zz = sqr(x);
	      A0 z = zz*horner< NT2_HORNER_COEFF_T(stype, 6,
               (0x3f716b9b0bd48ad3ll,
                0xbfe34341333e5c16ll,
                0x4015c74b178a2dd9ll,
                0xc0304331de27907bll,
                0x40339007da779259ll,
                0xc020656c06ceafd5ll)
               )>(zz)/
             horner< NT2_HORNER_COEFF_T(stype, 6,
               (0x3ff0000000000000ll,
                0xc02d7b590b5e0eabll,
                0x40519fc025fe9054ll,
                0xc06265bb6d3576d7ll,
                0x4061705684ffbf9dll,
                0xc04898220a3607acll)
               )>(zz);
	      zz = x*z+x;
	    }
	  return b_xor(bitofsign(a0), zz);
	}
예제 #27
0
void main()
{
	int a[M]={1,2,3,4,5,6,7,8,9,10};
	int b[M]={1,1,1,1,1,1,1,1,1,1};
	int c1[N][N],c2[N][N];
	madd(a,b,c1);
	mult(a,b,c2);
	printf("a¾ØÕó:\n");disp1(a);
	printf("b¾ØÕó:\n");disp1(b);
    printf("a+b:\n");disp2(c1);
    printf("a¡Áb:\n");disp2(c2);
	printf("\n");
}
예제 #28
0
파일: addtest.c 프로젝트: jdliaw/CS111-Lab4
/* thread function */
void* threadfunc(void* arg) {
	long long iterations = (long long)arg;
	long long i;
	if (sync == 'm') {
		for (i = 0; i < iterations; i++) {
			madd(&counter, 1);
		}
		for (i = 0; i < iterations; i++) {
			madd(&counter, -1);
		}
	}
	else if (sync == 's') {
		for (i = 0; i < iterations; i++) {
			sadd(&counter, 1);
		}
		for (i = 0; i < iterations; i++) {
			sadd(&counter, -1);
		}
	}
	else if (sync == 'c') {
		for (i = 0; i < iterations; i++) {
			cadd(&counter, 1);
		}
		for (i = 0; i < iterations; i++) {
			cadd(&counter, -1);
		}
	}
	else {
		for (i = 0; i < iterations; i++) {
			add(&counter, 1);
		}
		for (i = 0; i < iterations; i++) {
			add(&counter, -1);
		}
	}
}
예제 #29
0
파일: Mgcd.c 프로젝트: 8l/csolve
FN minvert(MINT *a, MINT *b, MINT *c)
{	MINT x, y, z, w, Anew, Aold;
	int i = 0;
	static MINT one;
	static int oneinit = 1;

	if (oneinit) {
		oneinit = 0;
		MSET(1,&one);
	}
	MINIT(&x);
	MINIT(&y);
	MINIT(&z);
	MINIT(&w);
	MINIT(&Aold);
	MSET (1,&Anew);

	mcopy(b, &x);
	mcopy(a, &y);
	/*
	 * Loop invariant:
	 *
	 * y = -1^i * Anew * a  mod b
	 */
	while(mtest(&y) != 0)
	{	mdiv(&x, &y, &w, &z);
		mcopy(&Anew, &x);
		mmult(&w, &Anew, &Anew);
		madd(&Anew, &Aold, &Anew);
		mmove(&x, &Aold);
		mmove(&y, &x);
		mmove(&z, &y);
		i++;
	}
	if (mcmp(&one,&x)) {
		mcopy(&one,c);
	} else {
		mmove(&Aold, c);
		if( (i&01) == 0) msub(b, c, c);
	}

	MFREE(&x);
	MFREE(&y);
	MFREE(&z);
	MFREE(&w);
	MFREE(&Aold);
	MFREE(&Anew);
}
예제 #30
0
파일: d_invtrig.hpp 프로젝트: msuchard/nt2
	static inline A0 atan(const  A0& a0)
	{
	  //	static const A0 tanpio8 = double_constant<double, 0x3fda827999fcef31ll>();
	  if (is_eqz(a0))  return a0;
	  if (is_inf(a0)) return Pio_2<A0>()*sign(a0);
	  A0 x =  nt2::abs(a0);
	  A0 y;
	  A0 flag = (x >  double_constant<double,0x4003504f333f9de6ll>());
	  if (flag)
	    {
	      y =  Pio_2<A0>();
	      x =  -rec(x);
	    }
	  else if ((x <=  double_constant<double,0x3fe51eb851eb851fll>()))
	    {
	      y = Zero<A0>();
	    }
	  else
	    {
	      y = Pio_4<A0>();
	      flag = Half<A0>();
	      x = minusone(x)/oneplus(x);
	    }
	  A0 z = sqr(x);
	  z = z*horner< NT2_HORNER_COEFF_T(stype, 5,
                     (0xbfec007fa1f72594ll,
                0xc03028545b6b807all,
                0xc052c08c36880273ll,
                0xc05eb8bf2d05ba25ll,
                0xc0503669fd28ec8ell)
                     )>(z)/
              horner< NT2_HORNER_COEFF_T(stype, 6,
                     (0x3ff0000000000000ll,
                0x4038dbc45b14603cll,
                0x4064a0dd43b8fa25ll,
                0x407b0e18d2e2be3bll,
                0x407e563f13b049eall,
                0x4068519efbbd62ecll)
                     )>(z);
	  z = madd(x, z, x);
	  static const A0 morebits = double_constant<double,0x3c91a62633145c07ll>();
	  z += flag * morebits;
	  y = y + z;
	  if( is_ltz(a0) )  y = -y;
	  return(y);
	}