bool run_sincospi(){ double *A, *Ad, *B, *C, *Bd, *Cd; A = new double[N]; B = new double[N]; C = new double[N]; for(int i=0;i<N;i++){ A[i] = 1.0; } hipMalloc((void**)&Ad, SIZE); hipMalloc((void**)&Bd, SIZE); hipMalloc((void**)&Cd, SIZE); hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); hipLaunchKernel(test_sincospi, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); int passed = 0; for(int i=0;i<512;i++){ if(B[i] - sinpi(1.0) < 0.1){ passed = 1; } } passed = 0; for(int i=0;i<512;i++){ if(C[i] - cospi(1.0) < 0.1){ passed = 1; } } free(A); if(passed == 1){ return true; } assert(passed == 1); return false; }
// unused now from R double bessel_j(double x, double alpha) { int nb, ncalc; double na, *bj; #ifndef MATHLIB_STANDALONE const void *vmax; #endif #ifdef IEEE_754 /* NaNs propagated correctly */ if (ISNAN(x) || ISNAN(alpha)) return x + alpha; #endif if (x < 0) { ML_ERROR(ME_RANGE, "bessel_j"); return ML_NAN; } na = floor(alpha); if (alpha < 0) { /* Using Abramowitz & Stegun 9.1.2 * this may not be quite optimal (CPU and accuracy wise) */ return(((alpha - na == 0.5) ? 0 : bessel_j(x, -alpha) * cospi(alpha)) + ((alpha == na ) ? 0 : bessel_y(x, -alpha) * sinpi(alpha))); } else if (alpha > 1e7) { MATHLIB_WARNING("besselJ(x, nu): nu=%g too large for bessel_j() algorithm", alpha); return ML_NAN; } nb = 1 + (int)na; /* nb-1 <= alpha < nb */ alpha -= (double)(nb-1); #ifdef MATHLIB_STANDALONE bj = (double *) calloc(nb, sizeof(double)); #ifndef _RENJIN if (!bj) MATHLIB_ERROR("%s", _("bessel_j allocation error")); #endif #else vmax = vmaxget(); bj = (double *) R_alloc((size_t) nb, sizeof(double)); #endif J_bessel(&x, &alpha, &nb, bj, &ncalc); if(ncalc != nb) {/* error input */ if(ncalc < 0) MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%d) != nb (=%d); alpha=%g. Arg. out of range?\n"), x, ncalc, nb, alpha); else MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"), x, alpha+(double)nb-1); } x = bj[nb-1]; #ifdef MATHLIB_STANDALONE free(bj); #else vmaxset(vmax); #endif return x; }
float3 solve_monic(float3 p) { p = p * (1.0f / 3.0f); float pz = p.z; // compute a normalization value to scale the vector by. // The normalization factor is divided by 2^20. // This is supposed to make internal calculations unlikely // to overflow while also making underflows unlikely. float scal = 1.0f; float cx = static_cast < float >(cbrt(fabs(p.x))); float cy = static_cast < float >(cbrt(fabs(p.y))); scal = fmax(fmax(fabsf(p.z), cx), cy * cy) * (1.0f / 1048576.0f); float rscal = 1.0f / scal; p = p * float3(rscal * rscal * rscal, rscal * rscal, rscal); float bb = p.z * p.z; // div scal^2 float nq = bb - p.y; // div scal^2 float r = 1.5f * (p.y * p.z - p.x) - p.z * bb; // div scal^3 float nq3 = nq * nq * nq; // div scal^6 float r2 = r * r; // div scal^6 if (nq3 < r2) { // one root float root = sqrt(r2 - nq3); // div scal^3 float s = static_cast < float >(cbrt(r + root)); // div scal float t = static_cast < float >(cbrt(r - root)); // div scal return float3((s + t) * scal - pz, nan(0), nan(0)); } else { // three roots float phi_r = inversesqrt(nq3); // div scal ^ -3 float phi_root = static_cast < float >(cbrt(phi_r * nq3)); // div scal float theta = acospi(r * phi_r); theta *= 1.0f / 3.0f; float ncprod = phi_root * cospi(theta); float dev = 1.73205080756887729353f * phi_root * sinpi(theta); return float3(2 * ncprod, -dev - ncprod, dev - ncprod) * scal - pz; } }
/* Called from R: modified version of bessel_j(), accepting a work array * instead of allocating one. */ double bessel_j_ex(double x, double alpha, double *bj) { int nb, ncalc; double na; #ifdef IEEE_754 /* NaNs propagated correctly */ if (ISNAN(x) || ISNAN(alpha)) return x + alpha; #endif if (x < 0) { ML_ERROR(ME_RANGE, "bessel_j"); return ML_NAN; } na = floor(alpha); if (alpha < 0) { /* Using Abramowitz & Stegun 9.1.2 * this may not be quite optimal (CPU and accuracy wise) */ return(bessel_j_ex(x, -alpha, bj) * cospi(alpha) + ((alpha == na) ? 0 : bessel_y_ex(x, -alpha, bj) * sinpi(alpha))); } else if (alpha > 1e7) { MATHLIB_WARNING("besselJ(x, nu): nu=%g too large for bessel_j() algorithm", alpha); return ML_NAN; } nb = 1 + (int)na; /* nb-1 <= alpha < nb */ alpha -= (double)(nb-1); // ==> alpha' in [0, 1) J_bessel(&x, &alpha, &nb, bj, &ncalc); if(ncalc != nb) {/* error input */ if(ncalc < 0) MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%d) != nb (=%d); alpha=%g. Arg. out of range?\n"), x, ncalc, nb, alpha); else MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"), x, alpha+(double)nb-1); } x = bj[nb-1]; return x; }
__device__ void double_precision_math_functions() { int iX; double fX, fY; acos(1.0); acosh(1.0); asin(0.0); asinh(0.0); atan(0.0); atan2(0.0, 1.0); atanh(0.0); cbrt(0.0); ceil(0.0); copysign(1.0, -2.0); cos(0.0); cosh(0.0); cospi(0.0); cyl_bessel_i0(0.0); cyl_bessel_i1(0.0); erf(0.0); erfc(0.0); erfcinv(2.0); erfcx(0.0); erfinv(1.0); exp(0.0); exp10(0.0); exp2(0.0); expm1(0.0); fabs(1.0); fdim(1.0, 0.0); floor(0.0); fma(1.0, 2.0, 3.0); fmax(0.0, 0.0); fmin(0.0, 0.0); fmod(0.0, 1.0); frexp(0.0, &iX); hypot(1.0, 0.0); ilogb(1.0); isfinite(0.0); isinf(0.0); isnan(0.0); j0(0.0); j1(0.0); jn(-1.0, 1.0); ldexp(0.0, 0); lgamma(1.0); llrint(0.0); llround(0.0); log(1.0); log10(1.0); log1p(-1.0); log2(1.0); logb(1.0); lrint(0.0); lround(0.0); modf(0.0, &fX); nan("1"); nearbyint(0.0); nextafter(0.0, 0.0); fX = 1.0; norm(1, &fX); norm3d(1.0, 0.0, 0.0); norm4d(1.0, 0.0, 0.0, 0.0); normcdf(0.0); normcdfinv(1.0); pow(1.0, 0.0); rcbrt(1.0); remainder(2.0, 1.0); remquo(1.0, 2.0, &iX); rhypot(0.0, 1.0); rint(1.0); fX = 1.0; rnorm(1, &fX); rnorm3d(0.0, 0.0, 1.0); rnorm4d(0.0, 0.0, 0.0, 1.0); round(0.0); rsqrt(1.0); scalbln(0.0, 1); scalbn(0.0, 1); signbit(1.0); sin(0.0); sincos(0.0, &fX, &fY); sincospi(0.0, &fX, &fY); sinh(0.0); sinpi(0.0); sqrt(0.0); tan(0.0); tanh(0.0); tgamma(2.0); trunc(0.0); y0(1.0); y1(1.0); yn(1, 1.0); }