bool run_sincospi(){
double *A, *Ad, *B, *C, *Bd, *Cd;
A = new double[N];
B = new double[N];
C = new double[N];
for(int i=0;i<N;i++){
A[i] = 1.0;
}
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMalloc((void**)&Cd, SIZE);
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(test_sincospi, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost);
int passed = 0;
for(int i=0;i<512;i++){
    if(B[i] - sinpi(1.0) < 0.1){
        passed = 1;
    }
}
passed = 0;
for(int i=0;i<512;i++){
    if(C[i] - cospi(1.0) < 0.1){
        passed = 1;
    }
}
free(A);
if(passed == 1){
    return true;
}
assert(passed == 1);
return false;
}
示例#2
0
// unused now from R
double bessel_j(double x, double alpha)
{
    int nb, ncalc;
    double na, *bj;
#ifndef MATHLIB_STANDALONE
    const void *vmax;
#endif

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(alpha)) return x + alpha;
#endif
    if (x < 0) {
	ML_ERROR(ME_RANGE, "bessel_j");
	return ML_NAN;
    }
    na = floor(alpha);
    if (alpha < 0) {
	/* Using Abramowitz & Stegun  9.1.2
	 * this may not be quite optimal (CPU and accuracy wise) */
	return(((alpha - na == 0.5) ? 0 : bessel_j(x, -alpha) * cospi(alpha)) +
	       ((alpha      == na ) ? 0 : bessel_y(x, -alpha) * sinpi(alpha)));
    }
    else if (alpha > 1e7) {
	MATHLIB_WARNING("besselJ(x, nu): nu=%g too large for bessel_j() algorithm", alpha);
	return ML_NAN;
    }
    nb = 1 + (int)na; /* nb-1 <= alpha < nb */
    alpha -= (double)(nb-1);
#ifdef MATHLIB_STANDALONE
    bj = (double *) calloc(nb, sizeof(double));
#ifndef _RENJIN
    if (!bj) MATHLIB_ERROR("%s", _("bessel_j allocation error"));
#endif
#else
    vmax = vmaxget();
    bj = (double *) R_alloc((size_t) nb, sizeof(double));
#endif
    J_bessel(&x, &alpha, &nb, bj, &ncalc);
    if(ncalc != nb) {/* error input */
      if(ncalc < 0)
	MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%d) != nb (=%d); alpha=%g. Arg. out of range?\n"),
			 x, ncalc, nb, alpha);
      else
	MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"),
			 x, alpha+(double)nb-1);
    }
    x = bj[nb-1];
#ifdef MATHLIB_STANDALONE
    free(bj);
#else
    vmaxset(vmax);
#endif
    return x;
}
示例#3
0
float3 solve_monic(float3 p)
{

	p = p * (1.0f / 3.0f);

	float pz = p.z;

	// compute a normalization value to scale the vector by.
	// The normalization factor is divided by 2^20.
	// This is supposed to make internal calculations unlikely
	// to overflow while also making underflows unlikely.
	float scal = 1.0f;

	float cx = static_cast < float >(cbrt(fabs(p.x)));
	float cy = static_cast < float >(cbrt(fabs(p.y)));
	scal = fmax(fmax(fabsf(p.z), cx), cy * cy) * (1.0f / 1048576.0f);
	float rscal = 1.0f / scal;
	p = p * float3(rscal * rscal * rscal, rscal * rscal, rscal);

	float bb = p.z * p.z;		// div scal^2

	float nq = bb - p.y;		// div scal^2
	float r = 1.5f * (p.y * p.z - p.x) - p.z * bb;	// div scal^3
	float nq3 = nq * nq * nq;	// div scal^6
	float r2 = r * r;			// div scal^6

	if (nq3 < r2)
	{
		// one root
		float root = sqrt(r2 - nq3);	// div scal^3
		float s = static_cast < float >(cbrt(r + root));	// div scal
		float t = static_cast < float >(cbrt(r - root));	// div scal
		return float3((s + t) * scal - pz, nan(0), nan(0));
	}
	else
	{
		// three roots
		float phi_r = inversesqrt(nq3);	// div scal ^ -3
		float phi_root = static_cast < float >(cbrt(phi_r * nq3));	// div scal
		float theta = acospi(r * phi_r);
		theta *= 1.0f / 3.0f;
		float ncprod = phi_root * cospi(theta);
		float dev = 1.73205080756887729353f * phi_root * sinpi(theta);
		return float3(2 * ncprod, -dev - ncprod, dev - ncprod) * scal - pz;
	}
}
示例#4
0
/* Called from R: modified version of bessel_j(), accepting a work array
 * instead of allocating one. */
double bessel_j_ex(double x, double alpha, double *bj)
{
    int nb, ncalc;
    double na;

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(x) || ISNAN(alpha)) return x + alpha;
#endif
    if (x < 0) {
	ML_ERROR(ME_RANGE, "bessel_j");
	return ML_NAN;
    }
    na = floor(alpha);
    if (alpha < 0) {
	/* Using Abramowitz & Stegun  9.1.2
	 * this may not be quite optimal (CPU and accuracy wise) */
	return(bessel_j_ex(x, -alpha, bj) * cospi(alpha) +
	       ((alpha == na) ? 0 :
		bessel_y_ex(x, -alpha, bj) * sinpi(alpha)));
    }
    else if (alpha > 1e7) {
	MATHLIB_WARNING("besselJ(x, nu): nu=%g too large for bessel_j() algorithm", alpha);
	return ML_NAN;
    }
    nb = 1 + (int)na; /* nb-1 <= alpha < nb */
    alpha -= (double)(nb-1); // ==> alpha' in [0, 1)
    J_bessel(&x, &alpha, &nb, bj, &ncalc);
    if(ncalc != nb) {/* error input */
      if(ncalc < 0)
	MATHLIB_WARNING4(_("bessel_j(%g): ncalc (=%d) != nb (=%d); alpha=%g. Arg. out of range?\n"),
			 x, ncalc, nb, alpha);
      else
	MATHLIB_WARNING2(_("bessel_j(%g,nu=%g): precision lost in result\n"),
			 x, alpha+(double)nb-1);
    }
    x = bj[nb-1];
    return x;
}
__device__ void double_precision_math_functions() {
    int iX;
    double fX, fY;

    acos(1.0);
    acosh(1.0);
    asin(0.0);
    asinh(0.0);
    atan(0.0);
    atan2(0.0, 1.0);
    atanh(0.0);
    cbrt(0.0);
    ceil(0.0);
    copysign(1.0, -2.0);
    cos(0.0);
    cosh(0.0);
    cospi(0.0);
    cyl_bessel_i0(0.0);
    cyl_bessel_i1(0.0);
    erf(0.0);
    erfc(0.0);
    erfcinv(2.0);
    erfcx(0.0);
    erfinv(1.0);
    exp(0.0);
    exp10(0.0);
    exp2(0.0);
    expm1(0.0);
    fabs(1.0);
    fdim(1.0, 0.0);
    floor(0.0);
    fma(1.0, 2.0, 3.0);
    fmax(0.0, 0.0);
    fmin(0.0, 0.0);
    fmod(0.0, 1.0);
    frexp(0.0, &iX);
    hypot(1.0, 0.0);
    ilogb(1.0);
    isfinite(0.0);
    isinf(0.0);
    isnan(0.0);
    j0(0.0);
    j1(0.0);
    jn(-1.0, 1.0);
    ldexp(0.0, 0);
    lgamma(1.0);
    llrint(0.0);
    llround(0.0);
    log(1.0);
    log10(1.0);
    log1p(-1.0);
    log2(1.0);
    logb(1.0);
    lrint(0.0);
    lround(0.0);
    modf(0.0, &fX);
    nan("1");
    nearbyint(0.0);
    nextafter(0.0, 0.0);
    fX = 1.0;
    norm(1, &fX);
    norm3d(1.0, 0.0, 0.0);
    norm4d(1.0, 0.0, 0.0, 0.0);
    normcdf(0.0);
    normcdfinv(1.0);
    pow(1.0, 0.0);
    rcbrt(1.0);
    remainder(2.0, 1.0);
    remquo(1.0, 2.0, &iX);
    rhypot(0.0, 1.0);
    rint(1.0);
    fX = 1.0;
    rnorm(1, &fX);
    rnorm3d(0.0, 0.0, 1.0);
    rnorm4d(0.0, 0.0, 0.0, 1.0);
    round(0.0);
    rsqrt(1.0);
    scalbln(0.0, 1);
    scalbn(0.0, 1);
    signbit(1.0);
    sin(0.0);
    sincos(0.0, &fX, &fY);
    sincospi(0.0, &fX, &fY);
    sinh(0.0);
    sinpi(0.0);
    sqrt(0.0);
    tan(0.0);
    tanh(0.0);
    tgamma(2.0);
    trunc(0.0);
    y0(1.0);
    y1(1.0);
    yn(1, 1.0);
}