Пример #1
0
static float
lg_cospi (float x)
{
  if (x <= 0.25f)
    return __cosf ((float) M_PI * x);
  else
    return __sinf ((float) M_PI * (0.5f - x));
}
Пример #2
0
float
__ieee754_y1f(float x)
{
	float z, s,c,ss,cc,u,v;
	int32_t hx,ix;

	GET_FLOAT_WORD(hx,x);
	ix = 0x7fffffff&hx;
    /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */
	if(__builtin_expect(ix>=0x7f800000, 0)) return  one/(x+x*x);
	if(__builtin_expect(ix==0, 0))
		return -HUGE_VALF+x;  /* -inf and overflow exception.  */
	if(__builtin_expect(hx<0, 0)) return zero/(zero*x);
	if(ix >= 0x40000000) {  /* |x| >= 2.0 */
		SET_RESTORE_ROUNDF (FE_TONEAREST);
		__sincosf (x, &s, &c);
		ss = -s-c;
		cc = s-c;
		if(ix<0x7f000000) {  /* make sure x+x not overflow */
		    z = __cosf(x+x);
		    if ((s*c)>zero) cc = z/ss;
		    else            ss = z/cc;
		}
	/* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
	 * where x0 = x-3pi/4
	 *      Better formula:
	 *              cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
	 *                      =  1/sqrt(2) * (sin(x) - cos(x))
	 *              sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
	 *                      = -1/sqrt(2) * (cos(x) + sin(x))
	 * To avoid cancellation, use
	 *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
	 * to compute the worse one.
	 */
		if(ix>0x48000000) z = (invsqrtpi*ss)/__ieee754_sqrtf(x);
		else {
		    u = ponef(x); v = qonef(x);
		    z = invsqrtpi*(u*ss+v*cc)/__ieee754_sqrtf(x);
		}
		return z;
	}
	if(__builtin_expect(ix<=0x33000000, 0)) {    /* x < 2**-25 */
	    z = -tpi / x;
	    if (__isinff (z))
		__set_errno (ERANGE);
	    return z;
	}
	z = x*x;
	u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
	v = one+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4]))));
	return(x*(u/v) + tpi*(__ieee754_j1f(x)*__ieee754_logf(x)-one/x));
}
Пример #3
0
float
__ieee754_y0f(float x)
{
	float z, s,c,ss,cc,u,v;
	int32_t hx,ix;

	GET_FLOAT_WORD(hx,x);
	ix = 0x7fffffff&hx;
    /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0, y0(0) is -inf.  */
	if(ix>=0x7f800000) return  one/(x+x*x);
	if(ix==0) return -HUGE_VALF+x;  /* -inf and overflow exception.  */
	if(hx<0) return zero/(zero*x);
	if(ix >= 0x40000000) {  /* |x| >= 2.0 */
	/* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
	 * where x0 = x-pi/4
	 *      Better formula:
	 *              cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
	 *                      =  1/sqrt(2) * (sin(x) + cos(x))
	 *              sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
	 *                      =  1/sqrt(2) * (sin(x) - cos(x))
	 * To avoid cancellation, use
	 *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
	 * to compute the worse one.
	 */
		__sincosf (x, &s, &c);
		ss = s-c;
		cc = s+c;
	/*
	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
	 */
		if(ix<0x7f000000) {  /* make sure x+x not overflow */
		    z = -__cosf(x+x);
		    if ((s*c)<zero) cc = z/ss;
		    else            ss = z/cc;
		}
		if(ix>0x48000000) z = (invsqrtpi*ss)/__ieee754_sqrtf(x);
		else {
		    u = pzerof(x); v = qzerof(x);
		    z = invsqrtpi*(u*ss+v*cc)/__ieee754_sqrtf(x);
		}
		return z;
	}
	if(ix<=0x32000000) {	/* x < 2**-27 */
	    return(u00 + tpi*__ieee754_logf(x));
	}
	z = x*x;
	u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06)))));
	v = one+z*(v01+z*(v02+z*(v03+z*v04)));
	return(u/v + tpi*(__ieee754_j0f(x)*__ieee754_logf(x)));
}
Пример #4
0
__device__ void single_precision_intrinsics()
{
    float fX, fY;

    __cosf(0.0f);
    __exp10f(0.0f);
    __expf(0.0f);
    __fadd_rd(0.0f, 1.0f);
    __fadd_rn(0.0f, 1.0f);
    __fadd_ru(0.0f, 1.0f);
    __fadd_rz(0.0f, 1.0f);
    __fdiv_rd(4.0f, 2.0f);
    __fdiv_rn(4.0f, 2.0f);
    __fdiv_ru(4.0f, 2.0f);
    __fdiv_rz(4.0f, 2.0f);
    __fdividef(4.0f, 2.0f);
    __fmaf_rd(1.0f, 2.0f, 3.0f);
    __fmaf_rn(1.0f, 2.0f, 3.0f);
    __fmaf_ru(1.0f, 2.0f, 3.0f);
    __fmaf_rz(1.0f, 2.0f, 3.0f);
    __fmul_rd(1.0f, 2.0f);
    __fmul_rn(1.0f, 2.0f);
    __fmul_ru(1.0f, 2.0f);
    __fmul_rz(1.0f, 2.0f);
    __frcp_rd(2.0f);
    __frcp_rn(2.0f);
    __frcp_ru(2.0f);
    __frcp_rz(2.0f);
    __frsqrt_rn(4.0f);
    __fsqrt_rd(4.0f);
    __fsqrt_rn(4.0f);
    __fsqrt_ru(4.0f);
    __fsqrt_rz(4.0f);
    __fsub_rd(2.0f, 1.0f);
    __fsub_rn(2.0f, 1.0f);
    __fsub_ru(2.0f, 1.0f);
    __fsub_rz(2.0f, 1.0f);
    __log10f(1.0f);
    __log2f(1.0f);
    __logf(1.0f);
    __powf(1.0f, 0.0f);
    __saturatef(0.1f);
    __sincosf(0.0f, &fX, &fY);
    __sinf(0.0f);
    __tanf(0.0f);
}
Пример #5
0
float
__ieee754_j0f(float x)
{
	float z, s,c,ss,cc,r,u,v;
	int32_t hx,ix;

	GET_FLOAT_WORD(hx,x);
	ix = hx&0x7fffffff;
	if(ix>=0x7f800000) return one/(x*x);
	x = fabsf(x);
	if(ix >= 0x40000000) {	/* |x| >= 2.0 */
		__sincosf (x, &s, &c);
		ss = s-c;
		cc = s+c;
		if(ix<0x7f000000) {  /* make sure x+x not overflow */
		    z = -__cosf(x+x);
		    if ((s*c)<zero) cc = z/ss;
		    else	    ss = z/cc;
		}
	/*
	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
	 */
		if(ix>0x48000000) z = (invsqrtpi*cc)/__ieee754_sqrtf(x);
		else {
		    u = pzerof(x); v = qzerof(x);
		    z = invsqrtpi*(u*cc-v*ss)/__ieee754_sqrtf(x);
		}
		return z;
	}
	if(ix<0x39000000) {	/* |x| < 2**-13 */
	    math_force_eval(huge+x);		/* raise inexact if x != 0 */
	    if(ix<0x32000000) return one;	/* |x|<2**-27 */
	    else	      return one - (float)0.25*x*x;
	}
	z = x*x;
	r =  z*(R02+z*(R03+z*(R04+z*R05)));
	s =  one+z*(S01+z*(S02+z*(S03+z*S04)));
	if(ix < 0x3F800000) {	/* |x| < 1.00 */
	    return one + z*((float)-0.25+(r/s));
	} else {
	    u = (float)0.5*x;
	    return((one+u)*(one-u)+z*(r/s));
	}
}
Пример #6
0
float
__ieee754_j1f(float x)
{
	float z, s,c,ss,cc,r,u,v,y;
	int32_t hx,ix;

	GET_FLOAT_WORD(hx,x);
	ix = hx&0x7fffffff;
	if(__builtin_expect(ix>=0x7f800000, 0)) return one/x;
	y = fabsf(x);
	if(ix >= 0x40000000) {	/* |x| >= 2.0 */
		__sincosf (y, &s, &c);
		ss = -s-c;
		cc = s-c;
		if(ix<0x7f000000) {  /* make sure y+y not overflow */
		    z = __cosf(y+y);
		    if ((s*c)>zero) cc = z/ss;
		    else	    ss = z/cc;
		}
	/*
	 * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
	 * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
	 */
		if(ix>0x48000000) z = (invsqrtpi*cc)/__ieee754_sqrtf(y);
		else {
		    u = ponef(y); v = qonef(y);
		    z = invsqrtpi*(u*cc-v*ss)/__ieee754_sqrtf(y);
		}
		if(hx<0) return -z;
		else	 return  z;
	}
	if(__builtin_expect(ix<0x32000000, 0)) {	/* |x|<2**-27 */
	    if(huge+x>one) return (float)0.5*x;/* inexact if x!=0 necessary */
	}
	z = x*x;
	r =  z*(r00+z*(r01+z*(r02+z*r03)));
	s =  one+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
	r *= x;
	return(x*(float)0.5+r/s);
}
Пример #7
0
        /**
         *
         * @param elong
         * @param phase
         * @param posX
         * @param posZ
         * @return
         */
        HDINLINE float3_X laserTransversal( float3_X elong, float_X phase, const float_X posX, const float_X posZ )
        {
            //const float_X modMue = float_X(PI) * float_X(SPEED_OF_LIGHT / WAVE_LENGTH) * INIT_TIME;
            const float_X f = SPEED_OF_LIGHT / WAVE_LENGTH;
            const float_X timeShift = phase / (2.0f * float_X(PI) * float_X(f)) + FOCUS_POS / SPEED_OF_LIGHT;
            const float_X spaceShift = SPEED_OF_LIGHT * tanf(TILT_X) * timeShift / CELL_HEIGHT;
            const float_X r2 = (posX + spaceShift) * (posX + spaceShift) + posZ * posZ;

            // pure gaussian
            //const float_X r2 = posX * posX + posZ * posZ;

            //rayleigh length (in y-direction)
            const float_X y_R = float_X( PI ) * W0 * W0 / WAVE_LENGTH;

            // the radius of curvature of the beam's  wavefronts
            const float_X R_y = -FOCUS_POS * ( float_X(1.0) + ( y_R / FOCUS_POS )*( y_R / FOCUS_POS ) );


#if !defined(__CUDA_ARCH__) // Host code path
            //beam waist in the near field: w_y(y=0) == W0
            const float_X w_y = W0 * sqrt( float_X(1.0) + ( FOCUS_POS / y_R )*( FOCUS_POS / y_R ) );
            //! the Gouy phase shift
            const float_X xi_y = atan( -FOCUS_POS / y_R );

            if( Polarisation == LINEAR_X || Polarisation == LINEAR_Z )
            {
                elong *= math::exp( -r2 / w_y / w_y ) * cos( 2.0f * float_X( PI ) / WAVE_LENGTH * FOCUS_POS - 2.0f * float_X( PI ) / WAVE_LENGTH * r2 / 2.0f / R_y + xi_y + phase )
                    * math::exp( -( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                       *( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                       / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0f * PULSE_LENGTH ) / ( 2.0f * PULSE_LENGTH ) );
            }
            else if( Polarisation == CIRCULAR )
            {
                elong.x() *= math::exp( -r2 / w_y / w_y ) * cos( 2.0f * float_X( PI ) / WAVE_LENGTH * FOCUS_POS - 2.0f * float_X( PI ) / WAVE_LENGTH * r2 / 2.0f / R_y + xi_y + phase )
                    * math::exp( -( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                       *( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                       / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0f * PULSE_LENGTH ) / ( 2.0f * PULSE_LENGTH ) );
                phase += float_X( PI / 2.0 );
                elong.z() *= math::exp( -r2 / w_y / w_y ) * cos( 2.0f * float_X( PI ) / WAVE_LENGTH * FOCUS_POS - 2.0f * float_X( PI ) / WAVE_LENGTH * r2 / 2.0f / R_y + xi_y + phase )
                    * math::exp( -( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                       *( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                       / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0f * PULSE_LENGTH ) / ( 2.0f * PULSE_LENGTH ) );
                phase -= float_X( PI / 2.0 );
            }

            return elong;
#else
            //beam waist in the near field: w_y(y=0) == W0
            const float_X w_y = W0 * sqrtf( float_X(1.0) + ( FOCUS_POS / y_R )*( FOCUS_POS / y_R ) );
            //! the Gouy phase shift
            const float_X xi_y = atanf( -FOCUS_POS / y_R );

            if( Polarisation == LINEAR_X || Polarisation == LINEAR_Z )
            {
                elong *= math::exp( -r2 / w_y / w_y ) * __cosf( 2.0f * float_X( PI ) / WAVE_LENGTH * FOCUS_POS - 2.0f * float_X( PI ) / WAVE_LENGTH * r2 / 2.0f / R_y + xi_y + phase )
                    * math::exp( -( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                          *( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                          / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0f * PULSE_LENGTH ) / ( 2.0f * PULSE_LENGTH ) );
            }
            else if( Polarisation == CIRCULAR )
            {
                elong.x() *= math::exp( -r2 / w_y / w_y ) * __cosf( 2.0f * float_X( PI ) / WAVE_LENGTH * FOCUS_POS - 2.0f * float_X( PI ) / WAVE_LENGTH * r2 / 2.0f / R_y + xi_y + phase )
                    * math::exp( -( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                          *( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                          / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0f * PULSE_LENGTH ) / ( 2.0f * PULSE_LENGTH ) );
                phase += float_X( PI / 2.0 );
                elong.z() *= math::exp( -r2 / w_y / w_y ) * __cosf( 2.0f * float_X( PI ) / WAVE_LENGTH * FOCUS_POS - 2.0f * float_X( PI ) / WAVE_LENGTH * r2 / 2.0f / R_y + xi_y + phase )
                    * math::exp( -( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                          *( r2 / 2.0f / R_y - FOCUS_POS - phase / 2.0f / float_X( PI ) * WAVE_LENGTH )
                          / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0f * PULSE_LENGTH ) / ( 2.0f * PULSE_LENGTH ) );
                phase -= float_X( PI / 2.0 );
            }

            return elong;
#endif
        }
Пример #8
0
__device__ inline float  occaCuda_fastCos(const float x){  return __cosf(x); }
Пример #9
0
 DINLINE float operator()(const float& value) const
 {
     return __cosf(value);
 }
Пример #10
0
float
__ieee754_gammaf_r (float x, int *signgamp)
{
  int32_t hx;
  float ret;

  GET_FLOAT_WORD (hx, x);

  if (__glibc_unlikely ((hx & 0x7fffffff) == 0))
    {
      /* Return value for x == 0 is Inf with divide by zero exception.  */
      *signgamp = 0;
      return 1.0 / x;
    }
  if (__builtin_expect (hx < 0, 0)
      && (u_int32_t) hx < 0xff800000 && __rintf (x) == x)
    {
      /* Return value for integer x < 0 is NaN with invalid exception.  */
      *signgamp = 0;
      return (x - x) / (x - x);
    }
  if (__glibc_unlikely (hx == 0xff800000))
    {
      /* x == -Inf.  According to ISO this is NaN.  */
      *signgamp = 0;
      return x - x;
    }
  if (__glibc_unlikely ((hx & 0x7f800000) == 0x7f800000))
    {
      /* Positive infinity (return positive infinity) or NaN (return
	 NaN).  */
      *signgamp = 0;
      return x + x;
    }

  if (x >= 36.0f)
    {
      /* Overflow.  */
      *signgamp = 0;
      ret = math_narrow_eval (FLT_MAX * FLT_MAX);
      return ret;
    }
  else
    {
      SET_RESTORE_ROUNDF (FE_TONEAREST);
      if (x > 0.0f)
	{
	  *signgamp = 0;
	  int exp2_adj;
	  float tret = gammaf_positive (x, &exp2_adj);
	  ret = __scalbnf (tret, exp2_adj);
	}
      else if (x >= -FLT_EPSILON / 4.0f)
	{
	  *signgamp = 0;
	  ret = 1.0f / x;
	}
      else
	{
	  float tx = __truncf (x);
	  *signgamp = (tx == 2.0f * __truncf (tx / 2.0f)) ? -1 : 1;
	  if (x <= -42.0f)
	    /* Underflow.  */
	    ret = FLT_MIN * FLT_MIN;
	  else
	    {
	      float frac = tx - x;
	      if (frac > 0.5f)
		frac = 1.0f - frac;
	      float sinpix = (frac <= 0.25f
			      ? __sinf ((float) M_PI * frac)
			      : __cosf ((float) M_PI * (0.5f - frac)));
	      int exp2_adj;
	      float tret = (float) M_PI / (-x * sinpix
					   * gammaf_positive (-x, &exp2_adj));
	      ret = __scalbnf (tret, -exp2_adj);
	      math_check_force_underflow_nonneg (ret);
	    }
	}
      ret = math_narrow_eval (ret);
    }
  if (isinf (ret) && x != 0)
    {
      if (*signgamp < 0)
	{
	  ret = math_narrow_eval (-__copysignf (FLT_MAX, ret) * FLT_MAX);
	  ret = -ret;
	}
      else
	ret = math_narrow_eval (__copysignf (FLT_MAX, ret) * FLT_MAX);
      return ret;
    }
  else if (ret == 0)
    {
      if (*signgamp < 0)
	{
	  ret = math_narrow_eval (-__copysignf (FLT_MIN, ret) * FLT_MIN);
	  ret = -ret;
	}
      else
	ret = math_narrow_eval (__copysignf (FLT_MIN, ret) * FLT_MIN);
      return ret;
    }
  else
    return ret;
}