bool run_sincospi(){
double *A, *Ad, *B, *C, *Bd, *Cd;
A = new double[N];
B = new double[N];
C = new double[N];
for(int i=0;i<N;i++){
A[i] = 1.0;
}
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMalloc((void**)&Cd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_sincospi, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for(int i=0;i<512;i++){
    if(B[i] - sinpi(1.0) < 0.1){
        passed = 1;
    }
}
passed = 0;
for(int i=0;i<512;i++){
    if(C[i] - cospi(1.0) < 0.1){
        passed = 1;
    }
}
free(A);
if(passed == 1){
    return true;
}
assert(passed == 1);
return false;
}
Пример #2
0
// unused now from R
double bessel_j(double x, double alpha)
{
    int nb, ncalc;
    double na, *bj;
#ifndef MATHLIB_STANDALONE
    const void *vmax;
#endif

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(alpha)) return x + alpha;
#endif
    if (x < 0) {
	ML_ERROR(ME_RANGE, "bessel_j");
	return ML_NAN;
    }
    na = floor(alpha);
    if (alpha < 0) {
	/* Using Abramowitz & Stegun  9.1.2
	 * this may not be quite optimal (CPU and accuracy wise) */
	return(((alpha - na == 0.5) ? 0 : bessel_j(x, -alpha) * cospi(alpha)) +
	       ((alpha      == na ) ? 0 : bessel_y(x, -alpha) * sinpi(alpha)));
    }
    else if (alpha > 1e7) {
	MATHLIB_WARNING("besselJ(x, nu): nu=%g too large for bessel_j() algorithm", alpha);
	return ML_NAN;
    }
    nb = 1 + (int)na; /* nb-1 <= alpha < nb */
    alpha -= (double)(nb-1);
#ifdef MATHLIB_STANDALONE
    bj = (double *) calloc(nb, sizeof(double));
#ifndef _RENJIN
    if (!bj) MATHLIB_ERROR("%s", _("bessel_j allocation error"));
#endif
#else
    vmax = vmaxget();
    bj = (double *) R_alloc((size_t) nb, sizeof(double));
#endif
    J_bessel(&x, &alpha, &nb, bj, &ncalc);
    if(ncalc != nb) {/* error input */
      if(ncalc < 0)
	MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%d) != nb (=%d); alpha=%g. Arg. out of range?\n"),
			 x, ncalc, nb, alpha);
      else
	MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"),
			 x, alpha+(double)nb-1);
    }
    x = bj[nb-1];
#ifdef MATHLIB_STANDALONE
    free(bj);
#else
    vmaxset(vmax);
#endif
    return x;
}
Пример #3
0
float3 solve_monic(float3 p)
{

	p = p * (1.0f / 3.0f);

	float pz = p.z;

	// compute a normalization value to scale the vector by.
	// The normalization factor is divided by 2^20.
	// This is supposed to make internal calculations unlikely
	// to overflow while also making underflows unlikely.
	float scal = 1.0f;

	float cx = static_cast < float >(cbrt(fabs(p.x)));
	float cy = static_cast < float >(cbrt(fabs(p.y)));
	scal = fmax(fmax(fabsf(p.z), cx), cy * cy) * (1.0f / 1048576.0f);
	float rscal = 1.0f / scal;
	p = p * float3(rscal * rscal * rscal, rscal * rscal, rscal);

	float bb = p.z * p.z;		// div scal^2

	float nq = bb - p.y;		// div scal^2
	float r = 1.5f * (p.y * p.z - p.x) - p.z * bb;	// div scal^3
	float nq3 = nq * nq * nq;	// div scal^6
	float r2 = r * r;			// div scal^6

	if (nq3 < r2)
	{
		// one root
		float root = sqrt(r2 - nq3);	// div scal^3
		float s = static_cast < float >(cbrt(r + root));	// div scal
		float t = static_cast < float >(cbrt(r - root));	// div scal
		return float3((s + t) * scal - pz, nan(0), nan(0));
	}
	else
	{
		// three roots
		float phi_r = inversesqrt(nq3);	// div scal ^ -3
		float phi_root = static_cast < float >(cbrt(phi_r * nq3));	// div scal
		float theta = acospi(r * phi_r);
		theta *= 1.0f / 3.0f;
		float ncprod = phi_root * cospi(theta);
		float dev = 1.73205080756887729353f * phi_root * sinpi(theta);
		return float3(2 * ncprod, -dev - ncprod, dev - ncprod) * scal - pz;
	}
}
Пример #4
0
/* Called from R: modified version of bessel_j(), accepting a work array
 * instead of allocating one. */
double bessel_j_ex(double x, double alpha, double *bj)
{
    int nb, ncalc;
    double na;

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(alpha)) return x + alpha;
#endif
    if (x < 0) {
	ML_ERROR(ME_RANGE, "bessel_j");
	return ML_NAN;
    }
    na = floor(alpha);
    if (alpha < 0) {
	/* Using Abramowitz & Stegun  9.1.2
	 * this may not be quite optimal (CPU and accuracy wise) */
	return(bessel_j_ex(x, -alpha, bj) * cospi(alpha) +
	       ((alpha == na) ? 0 :
		bessel_y_ex(x, -alpha, bj) * sinpi(alpha)));
    }
    else if (alpha > 1e7) {
	MATHLIB_WARNING("besselJ(x, nu): nu=%g too large for bessel_j() algorithm", alpha);
	return ML_NAN;
    }
    nb = 1 + (int)na; /* nb-1 <= alpha < nb */
    alpha -= (double)(nb-1); // ==> alpha' in [0, 1)
    J_bessel(&x, &alpha, &nb, bj, &ncalc);
    if(ncalc != nb) {/* error input */
      if(ncalc < 0)
	MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%d) != nb (=%d); alpha=%g. Arg. out of range?\n"),
			 x, ncalc, nb, alpha);
      else
	MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"),
			 x, alpha+(double)nb-1);
    }
    x = bj[nb-1];
    return x;
}
Пример #5
0
double tgamma(double x)
{
	union {
		double f;
		uint64_t i;
	} u;
	u.f = x;

	double absx;
	double y;
	double dy;
	double z;
	double r;
	uint32_t ix = u.i >> 32 & 0x7fffffff;
	int sign = u.i >> 63;

	/* special cases */

	if (ix >= 0x7ff00000) {
		/* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */

		return x + INFINITY;
	}

	if (ix < (0x3ff - 54) << 20) {
		/* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
		return 1 / x;
	}

	/* integer arguments */
	/* raise inexact when non-integer */

	if (x == floor(x)) {
		if (sign) {
			return 0 / 0.0;
		}

		if (x <= sizeof g_fact / sizeof * g_fact) {
			return g_fact[(int)x - 1];
		}
	}

	/* x >= 172: tgamma(x)=inf with overflow */
	/* x =< -184: tgamma(x)=+-0 with underflow */

	if (ix >= 0x40670000) {
		/* |x| >= 184 */

		if (sign) {
			FORCE_EVAL((float)(0x1p-126 / x));
			if (floor(x) * 0.5 == floor(x * 0.5)) {
				return 0;
			}
			return -0.0;
		}

		x *= 0x1p1023;
		return x;
	}

	absx = sign ? -x : x;

	/* handle the error of x + g - 0.5 */

	y = absx + g_gmhalf;
	if (absx > g_gmhalf) {
		dy = y - absx;
		dy -= g_gmhalf;
	} else {
		dy = y - g_gmhalf;
		dy -= absx;
	}

	z = absx - 0.5;
	r = s(absx) * exp(-y);
	if (x < 0) {
		/* reflection formula for negative x */
		/* sinpi(absx) is not 0, integers are already handled */

		r = -pi / (sinpi(absx) * absx * r);
		dy = -dy;
		z = -z;
	}

	r += dy * (g_gmhalf + 0.5) * r / y;
	z = pow(y, 0.5 * z);
	y = r * z * z;

	return y;
}
Пример #6
0
double lgammafn_sign(double x, int *sgn)
{
    double ans, y, sinpiy;

#ifdef NOMORE_FOR_THREADS
    static double xmax = 0.;
    static double dxrel = 0.;

    if (xmax == 0) {/* initialize machine dependent constants _ONCE_ */
	xmax = d1mach(2)/log(d1mach(2));/* = 2.533 e305	 for IEEE double */
	dxrel = sqrt (d1mach(4));/* sqrt(Eps) ~ 1.49 e-8  for IEEE double */
    }
#else
/* For IEEE double precision DBL_EPSILON = 2^-52 = 2.220446049250313e-16 :
   xmax  = DBL_MAX / log(DBL_MAX) = 2^1024 / (1024 * log(2)) = 2^1014 / log(2)
   dxrel = sqrt(DBL_EPSILON) = 2^-26 = 5^26 * 1e-26 (is *exact* below !)
 */
#define xmax  2.5327372760800758e+305
#define dxrel 1.490116119384765625e-8
#endif

    if (sgn != NULL) *sgn = 1;

#ifdef IEEE_754
    if(ISNAN(x)) return x;
#endif

    if (sgn != NULL && x < 0 && fmod(floor(-x), 2.) == 0)
	*sgn = -1;

    if (x <= 0 && x == trunc(x)) { /* Negative integer argument */
	ML_ERROR(ME_RANGE, "lgamma");
	return ML_POSINF;/* +Inf, since lgamma(x) = log|gamma(x)| */
    }

    y = fabs(x);

    if (y < 1e-306) return -log(y); // denormalized range, R change
    if (y <= 10) return log(fabs(gammafn(x)));
    /*
      ELSE  y = |x| > 10 ---------------------- */

    if (y > xmax) {
	ML_ERROR(ME_RANGE, "lgamma");
	return ML_POSINF;
    }

    if (x > 0) { /* i.e. y = x > 10 */
#ifdef IEEE_754
	if(x > 1e17)
	    return(x*(log(x) - 1.));
	else if(x > 4934720.)
	    return(M_LN_SQRT_2PI + (x - 0.5) * log(x) - x);
	else
#endif
	    return M_LN_SQRT_2PI + (x - 0.5) * log(x) - x + lgammacor(x);
    }
    /* else: x < -10; y = -x */
    sinpiy = fabs(sinpi(y));

    if (sinpiy == 0) { /* Negative integer argument ===
			  Now UNNECESSARY: caught above */
	MATHLIB_WARNING(" ** should NEVER happen! *** [lgamma.c: Neg.int, y=%g]\n",y);
	ML_ERR_return_NAN;
    }

    ans = M_LN_SQRT_PId2 + (x - 0.5) * log(y) - x - log(sinpiy) - lgammacor(y);

    if(fabs((x - trunc(x - 0.5)) * ans / x) < dxrel) {

	/* The answer is less than half precision because
	 * the argument is too near a negative integer. */

	ML_ERROR(ME_PRECISION, "lgamma");
    }

    return ans;
}
Пример #7
0
Float attribute_hidden Rf_gamma_cody(Float x)
{
    /* ----------------------------------------------------------------------

       This routine calculates the GAMMA function for a float argument X.
       Computation is based on an algorithm outlined in reference [1].
       The program uses rational functions that approximate the GAMMA
       function to at least 20 significant decimal digits.	Coefficients
       for the approximation over the interval (1,2) are unpublished.
       Those for the approximation for X >= 12 are from reference [2].
       The accuracy achieved depends on the arithmetic system, the
       compiler, the intrinsic functions, and proper selection of the
       machine-dependent constants.

       *******************************************************************

       Error returns

       The program returns the value XINF for singularities or
       when overflow would occur.	 The computation is believed
       to be free of underflow and overflow.

       Intrinsic functions required are:

       INT, DBLE, EXP, LOG, REAL, SIN


       References:
       [1]  "An Overview of Software Development for Special Functions",
    	W. J. Cody, Lecture Notes in Mathematics, 506,
    	Numerical Analysis Dundee, 1975, G. A. Watson (ed.),
    	Springer Verlag, Berlin, 1976.

       [2]  Computer Approximations, Hart, Et. Al., Wiley and sons, New York, 1968.

       Latest modification: October 12, 1989

       Authors: W. J. Cody and L. Stoltz
       Applied Mathematics Division
       Argonne National Laboratory
       Argonne, IL 60439
       ----------------------------------------------------------------------*/

    /* ----------------------------------------------------------------------
       Mathematical constants
       ----------------------------------------------------------------------*/
    const static double sqrtpi = .9189385332046727417803297; /* == ??? */

    /* *******************************************************************

       Explanation of machine-dependent constants

       beta	- radix for the floating-point representation
       maxexp - the smallest positive power of beta that overflows
       XBIG	- the largest argument for which GAMMA(X) is representable
    	in the machine, i.e., the solution to the equation
    	GAMMA(XBIG) = beta**maxexp
       XINF	- the largest machine representable floating-point number;
    	approximately beta**maxexp
       EPS	- the smallest positive floating-point number such that  1.0+EPS > 1.0
       XMININ - the smallest positive floating-point number such that
    	1/XMININ is machine representable

       Approximate values for some important machines are:

       beta	      maxexp	     XBIG

       CRAY-1		(S.P.)	      2		8191	    966.961
       Cyber 180/855
       under NOS	(S.P.)	      2		1070	    177.803
       IEEE (IBM/XT,
       SUN, etc.)	(S.P.)	      2		 128	    35.040
       IEEE (IBM/XT,
       SUN, etc.)	(D.P.)	      2		1024	    171.624
       IBM 3033	(D.P.)	     16		  63	    57.574
       VAX D-Format	(D.P.)	      2		 127	    34.844
       VAX G-Format	(D.P.)	      2		1023	    171.489

       XINF	 EPS	    XMININ

       CRAY-1		(S.P.)	 5.45E+2465   7.11E-15	  1.84E-2466
       Cyber 180/855
       under NOS	(S.P.)	 1.26E+322    3.55E-15	  3.14E-294
       IEEE (IBM/XT,
       SUN, etc.)	(S.P.)	 3.40E+38     1.19E-7	  1.18E-38
       IEEE (IBM/XT,
       SUN, etc.)	(D.P.)	 1.79D+308    2.22D-16	  2.23D-308
       IBM 3033	(D.P.)	 7.23D+75     2.22D-16	  1.39D-76
       VAX D-Format	(D.P.)	 1.70D+38     1.39D-17	  5.88D-39
       VAX G-Format	(D.P.)	 8.98D+307    1.11D-16	  1.12D-308

       *******************************************************************

       ----------------------------------------------------------------------
       Machine dependent parameters
       ----------------------------------------------------------------------
       */


    const static double xbig = 171.624;
    /* ML_POSINF ==   const double xinf = 1.79e308;*/
    /* DBL_EPSILON = const double eps = 2.22e-16;*/
    /* DBL_MIN ==   const double xminin = 2.23e-308;*/

    /*----------------------------------------------------------------------
      Numerator and denominator coefficients for rational minimax
      approximation over (1,2).
      ----------------------------------------------------------------------*/
    const static double p[8] = {
        -1.71618513886549492533811,
        24.7656508055759199108314,-379.804256470945635097577,
        629.331155312818442661052,866.966202790413211295064,
        -31451.2729688483675254357,-36144.4134186911729807069,
        66456.1438202405440627855
    };
    const static double q[8] = {
        -30.8402300119738975254353,
        315.350626979604161529144,-1015.15636749021914166146,
        -3107.77167157231109440444,22538.1184209801510330112,
        4755.84627752788110767815,-134659.959864969306392456,
        -115132.259675553483497211
    };
    /*----------------------------------------------------------------------
      Coefficients for minimax approximation over (12, INF).
      ----------------------------------------------------------------------*/
    const static double c[7] = {
        -.001910444077728,8.4171387781295e-4,
        -5.952379913043012e-4,7.93650793500350248e-4,
        -.002777777777777681622553,.08333333333333333331554247,
        .0057083835261
    };

    /* Local variables */
    int i, n;
    int parity;/*logical*/
    Float fact, xden, xnum, y, z, yi, res, sum, ysq;

    parity = (0);
    fact = 1.;
    n = 0;
    y = x;
    if (y <= 0.) {
        /* -------------------------------------------------------------
           Argument is negative
           ------------------------------------------------------------- */
        y = -x;
        yi = trunc(y);
        res = y - yi;
        if (res != 0.) {
            if (yi != trunc(yi * .5) * 2.)
                parity = (1);
            fact = -M_PI / sinpi(res);
            y += 1.;
        } else {
            return(ML_POSINF);
        }
    }
    /* -----------------------------------------------------------------
       Argument is positive
       -----------------------------------------------------------------*/
    if (y < DBL_EPSILON) {
        /* --------------------------------------------------------------
           Argument < EPS
           -------------------------------------------------------------- */
        if (y >= DBL_MIN) {
            res = 1. / y;
        } else {
            return(ML_POSINF);
        }
    } else if (y < 12.) {
        yi = y;
        if (y < 1.) {
            /* ---------------------------------------------------------
               EPS < argument < 1
               --------------------------------------------------------- */
            z = y;
            y += 1.;
        } else {
            /* -----------------------------------------------------------
               1 <= argument < 12, reduce argument if necessary
               ----------------------------------------------------------- */
            n = (int) trunc(y) - 1;
            y -= (double) n;
            z = y - 1.;
        }
        /* ---------------------------------------------------------
           Evaluate approximation for 1. < argument < 2.
           ---------------------------------------------------------*/
        xnum = 0.;
        xden = 1.;
        for (i = 0; i < 8; ++i) {
            xnum = (xnum + p[i]) * z;
            xden = xden * z + q[i];
        }
        res = xnum / xden + 1.;
        if (yi < y) {
            /* --------------------------------------------------------
               Adjust result for case  0. < argument < 1.
               -------------------------------------------------------- */
            res /= yi;
        } else if (yi > y) {
            /* ----------------------------------------------------------
               Adjust result for case  2. < argument < 12.
               ---------------------------------------------------------- */
            for (i = 0; i < n; ++i) {
                res *= y;
                y += 1.;
            }
        }
    } else {
        /* -------------------------------------------------------------
           Evaluate for argument >= 12.,
           ------------------------------------------------------------- */
        if (y <= xbig) {
            ysq = y * y;
            sum = c[6];
            for (i = 0; i < 6; ++i) {
                sum = sum / ysq + c[i];
            }
            sum = sum / y - y + sqrtpi;
            sum += (y - .5) * log(y);
            res = exp(sum);
        } else {
            return(ML_POSINF);
        }
    }
    /* ----------------------------------------------------------------------
       Final adjustments and return
       ----------------------------------------------------------------------*/
    if (parity)
        res = -res;
    if (fact != 1.)
        res = fact / res;
    return res;
}
Пример #8
0
__device__ void double_precision_math_functions() {
    int iX;
    double fX, fY;

    acos(1.0);
    acosh(1.0);
    asin(0.0);
    asinh(0.0);
    atan(0.0);
    atan2(0.0, 1.0);
    atanh(0.0);
    cbrt(0.0);
    ceil(0.0);
    copysign(1.0, -2.0);
    cos(0.0);
    cosh(0.0);
    cospi(0.0);
    cyl_bessel_i0(0.0);
    cyl_bessel_i1(0.0);
    erf(0.0);
    erfc(0.0);
    erfcinv(2.0);
    erfcx(0.0);
    erfinv(1.0);
    exp(0.0);
    exp10(0.0);
    exp2(0.0);
    expm1(0.0);
    fabs(1.0);
    fdim(1.0, 0.0);
    floor(0.0);
    fma(1.0, 2.0, 3.0);
    fmax(0.0, 0.0);
    fmin(0.0, 0.0);
    fmod(0.0, 1.0);
    frexp(0.0, &iX);
    hypot(1.0, 0.0);
    ilogb(1.0);
    isfinite(0.0);
    isinf(0.0);
    isnan(0.0);
    j0(0.0);
    j1(0.0);
    jn(-1.0, 1.0);
    ldexp(0.0, 0);
    lgamma(1.0);
    llrint(0.0);
    llround(0.0);
    log(1.0);
    log10(1.0);
    log1p(-1.0);
    log2(1.0);
    logb(1.0);
    lrint(0.0);
    lround(0.0);
    modf(0.0, &fX);
    nan("1");
    nearbyint(0.0);
    nextafter(0.0, 0.0);
    fX = 1.0;
    norm(1, &fX);
    norm3d(1.0, 0.0, 0.0);
    norm4d(1.0, 0.0, 0.0, 0.0);
    normcdf(0.0);
    normcdfinv(1.0);
    pow(1.0, 0.0);
    rcbrt(1.0);
    remainder(2.0, 1.0);
    remquo(1.0, 2.0, &iX);
    rhypot(0.0, 1.0);
    rint(1.0);
    fX = 1.0;
    rnorm(1, &fX);
    rnorm3d(0.0, 0.0, 1.0);
    rnorm4d(0.0, 0.0, 0.0, 1.0);
    round(0.0);
    rsqrt(1.0);
    scalbln(0.0, 1);
    scalbn(0.0, 1);
    signbit(1.0);
    sin(0.0);
    sincos(0.0, &fX, &fY);
    sincospi(0.0, &fX, &fY);
    sinh(0.0);
    sinpi(0.0);
    sqrt(0.0);
    tan(0.0);
    tanh(0.0);
    tgamma(2.0);
    trunc(0.0);
    y0(1.0);
    y1(1.0);
    yn(1, 1.0);
}