bool run_sincospi(){ double *A, *Ad, *B, *C, *Bd, *Cd; A = new double[N]; B = new double[N]; C = new double[N]; for(int i=0;i<N;i++){ A[i] = 1.0; } hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, SIZE); hipMalloc((void**)&Cd, SIZE); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); hipLaunchKernel(test_sincospi, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); int passed = 0; for(int i=0;i<512;i++){ if(B[i] - sinpi(1.0) < 0.1){ passed = 1; } } passed = 0; for(int i=0;i<512;i++){ if(C[i] - cospi(1.0) < 0.1){ passed = 1; } } free(A); if(passed == 1){ return true; } assert(passed == 1); return false; }
// unused now from R double bessel_j(double x, double alpha) { int nb, ncalc; double na, *bj; #ifndef MATHLIB_STANDALONE const void *vmax; #endif #ifdef IEEE_754 /* NaNs propagated correctly */ if (ISNAN(x) || ISNAN(alpha)) return x + alpha; #endif if (x < 0) { ML_ERROR(ME_RANGE, "bessel_j"); return ML_NAN; } na = floor(alpha); if (alpha < 0) { /* Using Abramowitz & Stegun 9.1.2 * this may not be quite optimal (CPU and accuracy wise) */ return(((alpha - na == 0.5) ? 0 : bessel_j(x, -alpha) * cospi(alpha)) + ((alpha == na ) ? 0 : bessel_y(x, -alpha) * sinpi(alpha))); } else if (alpha > 1e7) { MATHLIB_WARNING("besselJ(x, nu): nu=%g too large for bessel_j() algorithm", alpha); return ML_NAN; } nb = 1 + (int)na; /* nb-1 <= alpha < nb */ alpha -= (double)(nb-1); #ifdef MATHLIB_STANDALONE bj = (double *) calloc(nb, sizeof(double)); #ifndef _RENJIN if (!bj) MATHLIB_ERROR("%s", _("bessel_j allocation error")); #endif #else vmax = vmaxget(); bj = (double *) R_alloc((size_t) nb, sizeof(double)); #endif J_bessel(&x, &alpha, &nb, bj, &ncalc); if(ncalc != nb) {/* error input */ if(ncalc < 0) MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%d) != nb (=%d); alpha=%g. Arg. out of range?\n"), x, ncalc, nb, alpha); else MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"), x, alpha+(double)nb-1); } x = bj[nb-1]; #ifdef MATHLIB_STANDALONE free(bj); #else vmaxset(vmax); #endif return x; }
float3 solve_monic(float3 p) { p = p * (1.0f / 3.0f); float pz = p.z; // compute a normalization value to scale the vector by. // The normalization factor is divided by 2^20. // This is supposed to make internal calculations unlikely // to overflow while also making underflows unlikely. float scal = 1.0f; float cx = static_cast < float >(cbrt(fabs(p.x))); float cy = static_cast < float >(cbrt(fabs(p.y))); scal = fmax(fmax(fabsf(p.z), cx), cy * cy) * (1.0f / 1048576.0f); float rscal = 1.0f / scal; p = p * float3(rscal * rscal * rscal, rscal * rscal, rscal); float bb = p.z * p.z; // div scal^2 float nq = bb - p.y; // div scal^2 float r = 1.5f * (p.y * p.z - p.x) - p.z * bb; // div scal^3 float nq3 = nq * nq * nq; // div scal^6 float r2 = r * r; // div scal^6 if (nq3 < r2) { // one root float root = sqrt(r2 - nq3); // div scal^3 float s = static_cast < float >(cbrt(r + root)); // div scal float t = static_cast < float >(cbrt(r - root)); // div scal return float3((s + t) * scal - pz, nan(0), nan(0)); } else { // three roots float phi_r = inversesqrt(nq3); // div scal ^ -3 float phi_root = static_cast < float >(cbrt(phi_r * nq3)); // div scal float theta = acospi(r * phi_r); theta *= 1.0f / 3.0f; float ncprod = phi_root * cospi(theta); float dev = 1.73205080756887729353f * phi_root * sinpi(theta); return float3(2 * ncprod, -dev - ncprod, dev - ncprod) * scal - pz; } }
/* Called from R: modified version of bessel_j(), accepting a work array * instead of allocating one. */ double bessel_j_ex(double x, double alpha, double *bj) { int nb, ncalc; double na; #ifdef IEEE_754 /* NaNs propagated correctly */ if (ISNAN(x) || ISNAN(alpha)) return x + alpha; #endif if (x < 0) { ML_ERROR(ME_RANGE, "bessel_j"); return ML_NAN; } na = floor(alpha); if (alpha < 0) { /* Using Abramowitz & Stegun 9.1.2 * this may not be quite optimal (CPU and accuracy wise) */ return(bessel_j_ex(x, -alpha, bj) * cospi(alpha) + ((alpha == na) ? 0 : bessel_y_ex(x, -alpha, bj) * sinpi(alpha))); } else if (alpha > 1e7) { MATHLIB_WARNING("besselJ(x, nu): nu=%g too large for bessel_j() algorithm", alpha); return ML_NAN; } nb = 1 + (int)na; /* nb-1 <= alpha < nb */ alpha -= (double)(nb-1); // ==> alpha' in [0, 1) J_bessel(&x, &alpha, &nb, bj, &ncalc); if(ncalc != nb) {/* error input */ if(ncalc < 0) MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%d) != nb (=%d); alpha=%g. Arg. out of range?\n"), x, ncalc, nb, alpha); else MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"), x, alpha+(double)nb-1); } x = bj[nb-1]; return x; }
double tgamma(double x) { union { double f; uint64_t i; } u; u.f = x; double absx; double y; double dy; double z; double r; uint32_t ix = u.i >> 32 & 0x7fffffff; int sign = u.i >> 63; /* special cases */ if (ix >= 0x7ff00000) { /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ return x + INFINITY; } if (ix < (0x3ff - 54) << 20) { /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */ return 1 / x; } /* integer arguments */ /* raise inexact when non-integer */ if (x == floor(x)) { if (sign) { return 0 / 0.0; } if (x <= sizeof g_fact / sizeof * g_fact) { return g_fact[(int)x - 1]; } } /* x >= 172: tgamma(x)=inf with overflow */ /* x =< -184: tgamma(x)=+-0 with underflow */ if (ix >= 0x40670000) { /* |x| >= 184 */ if (sign) { FORCE_EVAL((float)(0x1p-126 / x)); if (floor(x) * 0.5 == floor(x * 0.5)) { return 0; } return -0.0; } x *= 0x1p1023; return x; } absx = sign ? -x : x; /* handle the error of x + g - 0.5 */ y = absx + g_gmhalf; if (absx > g_gmhalf) { dy = y - absx; dy -= g_gmhalf; } else { dy = y - g_gmhalf; dy -= absx; } z = absx - 0.5; r = s(absx) * exp(-y); if (x < 0) { /* reflection formula for negative x */ /* sinpi(absx) is not 0, integers are already handled */ r = -pi / (sinpi(absx) * absx * r); dy = -dy; z = -z; } r += dy * (g_gmhalf + 0.5) * r / y; z = pow(y, 0.5 * z); y = r * z * z; return y; }
double lgammafn_sign(double x, int *sgn) { double ans, y, sinpiy; #ifdef NOMORE_FOR_THREADS static double xmax = 0.; static double dxrel = 0.; if (xmax == 0) {/* initialize machine dependent constants _ONCE_ */ xmax = d1mach(2)/log(d1mach(2));/* = 2.533 e305 for IEEE double */ dxrel = sqrt (d1mach(4));/* sqrt(Eps) ~ 1.49 e-8 for IEEE double */ } #else /* For IEEE double precision DBL_EPSILON = 2^-52 = 2.220446049250313e-16 : xmax = DBL_MAX / log(DBL_MAX) = 2^1024 / (1024 * log(2)) = 2^1014 / log(2) dxrel = sqrt(DBL_EPSILON) = 2^-26 = 5^26 * 1e-26 (is *exact* below !) */ #define xmax 2.5327372760800758e+305 #define dxrel 1.490116119384765625e-8 #endif if (sgn != NULL) *sgn = 1; #ifdef IEEE_754 if(ISNAN(x)) return x; #endif if (sgn != NULL && x < 0 && fmod(floor(-x), 2.) == 0) *sgn = -1; if (x <= 0 && x == trunc(x)) { /* Negative integer argument */ ML_ERROR(ME_RANGE, "lgamma"); return ML_POSINF;/* +Inf, since lgamma(x) = log|gamma(x)| */ } y = fabs(x); if (y < 1e-306) return -log(y); // denormalized range, R change if (y <= 10) return log(fabs(gammafn(x))); /* ELSE y = |x| > 10 ---------------------- */ if (y > xmax) { ML_ERROR(ME_RANGE, "lgamma"); return ML_POSINF; } if (x > 0) { /* i.e. y = x > 10 */ #ifdef IEEE_754 if(x > 1e17) return(x*(log(x) - 1.)); else if(x > 4934720.) return(M_LN_SQRT_2PI + (x - 0.5) * log(x) - x); else #endif return M_LN_SQRT_2PI + (x - 0.5) * log(x) - x + lgammacor(x); } /* else: x < -10; y = -x */ sinpiy = fabs(sinpi(y)); if (sinpiy == 0) { /* Negative integer argument === Now UNNECESSARY: caught above */ MATHLIB_WARNING(" ** should NEVER happen! *** [lgamma.c: Neg.int, y=%g]\n",y); ML_ERR_return_NAN; } ans = M_LN_SQRT_PId2 + (x - 0.5) * log(y) - x - log(sinpiy) - lgammacor(y); if(fabs((x - trunc(x - 0.5)) * ans / x) < dxrel) { /* The answer is less than half precision because * the argument is too near a negative integer. */ ML_ERROR(ME_PRECISION, "lgamma"); } return ans; }
Float attribute_hidden Rf_gamma_cody(Float x) { /* ---------------------------------------------------------------------- This routine calculates the GAMMA function for a float argument X. Computation is based on an algorithm outlined in reference [1]. The program uses rational functions that approximate the GAMMA function to at least 20 significant decimal digits. Coefficients for the approximation over the interval (1,2) are unpublished. Those for the approximation for X >= 12 are from reference [2]. The accuracy achieved depends on the arithmetic system, the compiler, the intrinsic functions, and proper selection of the machine-dependent constants. ******************************************************************* Error returns The program returns the value XINF for singularities or when overflow would occur. The computation is believed to be free of underflow and overflow. Intrinsic functions required are: INT, DBLE, EXP, LOG, REAL, SIN References: [1] "An Overview of Software Development for Special Functions", W. J. Cody, Lecture Notes in Mathematics, 506, Numerical Analysis Dundee, 1975, G. A. Watson (ed.), Springer Verlag, Berlin, 1976. [2] Computer Approximations, Hart, Et. Al., Wiley and sons, New York, 1968. Latest modification: October 12, 1989 Authors: W. J. Cody and L. Stoltz Applied Mathematics Division Argonne National Laboratory Argonne, IL 60439 ----------------------------------------------------------------------*/ /* ---------------------------------------------------------------------- Mathematical constants ----------------------------------------------------------------------*/ const static double sqrtpi = .9189385332046727417803297; /* == ??? */ /* ******************************************************************* Explanation of machine-dependent constants beta - radix for the floating-point representation maxexp - the smallest positive power of beta that overflows XBIG - the largest argument for which GAMMA(X) is representable in the machine, i.e., the solution to the equation GAMMA(XBIG) = beta**maxexp XINF - the largest machine representable floating-point number; approximately beta**maxexp EPS - the smallest positive floating-point number such that 1.0+EPS > 1.0 XMININ - the smallest positive floating-point number such that 1/XMININ is machine representable Approximate values for some important machines are: beta maxexp XBIG CRAY-1 (S.P.) 2 8191 966.961 Cyber 180/855 under NOS (S.P.) 2 1070 177.803 IEEE (IBM/XT, SUN, etc.) (S.P.) 2 128 35.040 IEEE (IBM/XT, SUN, etc.) (D.P.) 2 1024 171.624 IBM 3033 (D.P.) 16 63 57.574 VAX D-Format (D.P.) 2 127 34.844 VAX G-Format (D.P.) 2 1023 171.489 XINF EPS XMININ CRAY-1 (S.P.) 5.45E+2465 7.11E-15 1.84E-2466 Cyber 180/855 under NOS (S.P.) 1.26E+322 3.55E-15 3.14E-294 IEEE (IBM/XT, SUN, etc.) (S.P.) 3.40E+38 1.19E-7 1.18E-38 IEEE (IBM/XT, SUN, etc.) (D.P.) 1.79D+308 2.22D-16 2.23D-308 IBM 3033 (D.P.) 7.23D+75 2.22D-16 1.39D-76 VAX D-Format (D.P.) 1.70D+38 1.39D-17 5.88D-39 VAX G-Format (D.P.) 8.98D+307 1.11D-16 1.12D-308 ******************************************************************* ---------------------------------------------------------------------- Machine dependent parameters ---------------------------------------------------------------------- */ const static double xbig = 171.624; /* ML_POSINF == const double xinf = 1.79e308;*/ /* DBL_EPSILON = const double eps = 2.22e-16;*/ /* DBL_MIN == const double xminin = 2.23e-308;*/ /*---------------------------------------------------------------------- Numerator and denominator coefficients for rational minimax approximation over (1,2). ----------------------------------------------------------------------*/ const static double p[8] = { -1.71618513886549492533811, 24.7656508055759199108314,-379.804256470945635097577, 629.331155312818442661052,866.966202790413211295064, -31451.2729688483675254357,-36144.4134186911729807069, 66456.1438202405440627855 }; const static double q[8] = { -30.8402300119738975254353, 315.350626979604161529144,-1015.15636749021914166146, -3107.77167157231109440444,22538.1184209801510330112, 4755.84627752788110767815,-134659.959864969306392456, -115132.259675553483497211 }; /*---------------------------------------------------------------------- Coefficients for minimax approximation over (12, INF). ----------------------------------------------------------------------*/ const static double c[7] = { -.001910444077728,8.4171387781295e-4, -5.952379913043012e-4,7.93650793500350248e-4, -.002777777777777681622553,.08333333333333333331554247, .0057083835261 }; /* Local variables */ int i, n; int parity;/*logical*/ Float fact, xden, xnum, y, z, yi, res, sum, ysq; parity = (0); fact = 1.; n = 0; y = x; if (y <= 0.) { /* ------------------------------------------------------------- Argument is negative ------------------------------------------------------------- */ y = -x; yi = trunc(y); res = y - yi; if (res != 0.) { if (yi != trunc(yi * .5) * 2.) parity = (1); fact = -M_PI / sinpi(res); y += 1.; } else { return(ML_POSINF); } } /* ----------------------------------------------------------------- Argument is positive -----------------------------------------------------------------*/ if (y < DBL_EPSILON) { /* -------------------------------------------------------------- Argument < EPS -------------------------------------------------------------- */ if (y >= DBL_MIN) { res = 1. / y; } else { return(ML_POSINF); } } else if (y < 12.) { yi = y; if (y < 1.) { /* --------------------------------------------------------- EPS < argument < 1 --------------------------------------------------------- */ z = y; y += 1.; } else { /* ----------------------------------------------------------- 1 <= argument < 12, reduce argument if necessary ----------------------------------------------------------- */ n = (int) trunc(y) - 1; y -= (double) n; z = y - 1.; } /* --------------------------------------------------------- Evaluate approximation for 1. < argument < 2. ---------------------------------------------------------*/ xnum = 0.; xden = 1.; for (i = 0; i < 8; ++i) { xnum = (xnum + p[i]) * z; xden = xden * z + q[i]; } res = xnum / xden + 1.; if (yi < y) { /* -------------------------------------------------------- Adjust result for case 0. < argument < 1. -------------------------------------------------------- */ res /= yi; } else if (yi > y) { /* ---------------------------------------------------------- Adjust result for case 2. < argument < 12. ---------------------------------------------------------- */ for (i = 0; i < n; ++i) { res *= y; y += 1.; } } } else { /* ------------------------------------------------------------- Evaluate for argument >= 12., ------------------------------------------------------------- */ if (y <= xbig) { ysq = y * y; sum = c[6]; for (i = 0; i < 6; ++i) { sum = sum / ysq + c[i]; } sum = sum / y - y + sqrtpi; sum += (y - .5) * log(y); res = exp(sum); } else { return(ML_POSINF); } } /* ---------------------------------------------------------------------- Final adjustments and return ----------------------------------------------------------------------*/ if (parity) res = -res; if (fact != 1.) res = fact / res; return res; }
__device__ void double_precision_math_functions() { int iX; double fX, fY; acos(1.0); acosh(1.0); asin(0.0); asinh(0.0); atan(0.0); atan2(0.0, 1.0); atanh(0.0); cbrt(0.0); ceil(0.0); copysign(1.0, -2.0); cos(0.0); cosh(0.0); cospi(0.0); cyl_bessel_i0(0.0); cyl_bessel_i1(0.0); erf(0.0); erfc(0.0); erfcinv(2.0); erfcx(0.0); erfinv(1.0); exp(0.0); exp10(0.0); exp2(0.0); expm1(0.0); fabs(1.0); fdim(1.0, 0.0); floor(0.0); fma(1.0, 2.0, 3.0); fmax(0.0, 0.0); fmin(0.0, 0.0); fmod(0.0, 1.0); frexp(0.0, &iX); hypot(1.0, 0.0); ilogb(1.0); isfinite(0.0); isinf(0.0); isnan(0.0); j0(0.0); j1(0.0); jn(-1.0, 1.0); ldexp(0.0, 0); lgamma(1.0); llrint(0.0); llround(0.0); log(1.0); log10(1.0); log1p(-1.0); log2(1.0); logb(1.0); lrint(0.0); lround(0.0); modf(0.0, &fX); nan("1"); nearbyint(0.0); nextafter(0.0, 0.0); fX = 1.0; norm(1, &fX); norm3d(1.0, 0.0, 0.0); norm4d(1.0, 0.0, 0.0, 0.0); normcdf(0.0); normcdfinv(1.0); pow(1.0, 0.0); rcbrt(1.0); remainder(2.0, 1.0); remquo(1.0, 2.0, &iX); rhypot(0.0, 1.0); rint(1.0); fX = 1.0; rnorm(1, &fX); rnorm3d(0.0, 0.0, 1.0); rnorm4d(0.0, 0.0, 0.0, 1.0); round(0.0); rsqrt(1.0); scalbln(0.0, 1); scalbn(0.0, 1); signbit(1.0); sin(0.0); sincos(0.0, &fX, &fY); sincospi(0.0, &fX, &fY); sinh(0.0); sinpi(0.0); sqrt(0.0); tan(0.0); tanh(0.0); tgamma(2.0); trunc(0.0); y0(1.0); y1(1.0); yn(1, 1.0); }