void check_special_values() {
  V4SF vx;
  vx.f[0] = -1000;
  vx.f[1] = -100;
  vx.f[2] = 100;
  vx.f[3] = 1000;
  printf("exp("); print4(vx.v); printf(") = "); print4(exp_ps(vx.v)); printf("\n");
  vx.f[0] = QNAN.f;
  vx.f[1] = PINF.f;
  vx.f[2] = MINF.f;
  vx.f[3] = QNAN2.f;
  printf("exp("); print4(vx.v); printf(") = "); print4(exp_ps(vx.v)); printf("\n");
  vx.f[0] = 0;
  vx.f[1] = -10;
  vx.f[2] = 1e30f;
  vx.f[3] = 1e-42f;
  printf("log("); print4(vx.v); printf(") = "); print4(log_ps(vx.v)); printf("\n");
  vx.f[0] = QNAN.f;
  vx.f[1] = PINF.f;
  vx.f[2] = MINF.f;
  vx.f[3] = QNAN2.f;
  printf("log("); print4(vx.v); printf(") = "); print4(log_ps(vx.v)); printf("\n");
  printf("sin("); print4(vx.v); printf(") = "); print4(sin_ps(vx.v)); printf("\n");
  printf("cos("); print4(vx.v); printf(") = "); print4(cos_ps(vx.v)); printf("\n");
  vx.f[0] = -1e30;
  vx.f[1] = -100000;
  vx.f[2] = 1e30;
  vx.f[3] = 100000;
  printf("sin("); print4(vx.v); printf(") = "); print4(sin_ps(vx.v)); printf("\n");
  printf("cos("); print4(vx.v); printf(") = "); print4(cos_ps(vx.v)); printf("\n");
}
Exemplo n.º 2
0
// SIMD cos
__SIMD _SIMD_cos_ps(__SIMD a)
{
#ifdef  USE_SSE
  return cos_ps(a); 
#elif defined USE_AVX
  return cos256_ps(a); 
#endif
}
Exemplo n.º 3
0
float Cos(float angleRadians)
{
#ifdef MATH_USE_SINCOS_LOOKUPTABLE
	return cos_lookuptable(angleRadians);
#elif defined(MATH_SSE2)
	// Do range reduction by 2pi before calling cos - this enchances precision of cos_ps a lot
	return s4f_x(cos_ps(modf_ps(setx_ps(angleRadians), pi2)));
#else
	return cosf(angleRadians);
#endif
}
Exemplo n.º 4
0
    //-----------------------------------------------------------------------------------
    ArrayReal MathlibNEON::Cos4( ArrayReal x )
    {
        // Map arbitrary angle x to the range [-pi; +pi] without using division.
        // Code taken from MSDN's HLSL trick. Architectures with fused mad (i.e. NEON)
        // can replace the add, the sub, & the two muls for two mad
        ArrayReal integralPart;
        x = vaddq_f32( vmulq_f32( x, ONE_DIV_2PI ), HALF );
        x = Modf4( x, integralPart );
        x = vsubq_f32( vmulq_f32( x, TWO_PI ), PI );

        return cos_ps( x );
    }
Exemplo n.º 5
0
    ArrayFloat MathlibSSE2::Cos4( ArrayFloat x )
    {
        // Map arbitrary angle x to the range [-pi; +pi] without using division.
        // Code taken from MSDN's HLSL trick. Architectures with fused mad (i.e. NEON)
        // can replace the add, the sub, & the two muls for two mad
        ArrayFloat integralPart;
        x = _mm_add_ps( _mm_mul_ps( x, ONE_DIV_2PI ), HALF );
        x = Modf4( x, integralPart );
        x = _mm_sub_ps( _mm_mul_ps( x, TWO_PI ), PI );

        return cos_ps( x );
    }
int check_sincos_precision(float xmin, float xmax) {
  unsigned nb_trials = 100000;
  printf("checking sines on [%g*Pi, %g*Pi]\n", xmin, xmax);


  float max_err_sin_ref = 0, max_err_sin_cep = 0, max_err_sin_x = 0;
  float max_err_cos_ref = 0, max_err_cos_cep = 0, max_err_cos_x = 0;
  float max_err_sum_sqr_test = 0;
  float max_err_sum_sqr_ref = 0;
  xmin *= M_PI; xmax *= M_PI;
  unsigned i;
  for (i=0; i < nb_trials; ++i) {
    V4SF vx, sin4, cos4, sin4_2, cos4_2;
    vx.f[0] = i*(xmax-xmin)/(nb_trials-1) + xmin;
    vx.f[1] = (i+.5)*(xmax-xmin)/(nb_trials-1) + xmin;
    vx.f[2] = frand()*(xmax-xmin);
    vx.f[3] = (i / 32)*M_PI/((i%32)+1);
    if (vx.f[3] < xmin || vx.f[3] > xmax) vx.f[3] = frand()*(xmax-xmin);

    /*
    vx.f[0] = M_PI/2;
    vx.f[1] = M_PI;
    vx.f[2] = M_PI/3;
    vx.f[3] = M_PI/4;
    */
    sin4.v = sin_ps(vx.v);
    cos4.v = cos_ps(vx.v);
    sincos_ps(vx.v, &sin4_2.v, &cos4_2.v);
    unsigned j;
    for (j=0; j < 4; ++j) {
      float x = vx.f[j];
      float sin_test = sin4.f[j];
      float cos_test = cos4.f[j];
      if (sin_test != sin4_2.f[j]) {
        printf("sin / sincos mismatch at x=%g\n", x);
        exit(1); return 1;
      }
      if (cos_test != cos4_2.f[j]) {
        printf("cos / sincos mismatch at x=%g\n", x);
        return 1;
      }
      float sin_ref = sinf(x);
      float sin_cep = cephes_sinf(x);
      float err_sin_ref = fabs(sin_ref - sin_test);
      float err_sin_cep = fabs(sin_cep - sin_test);
      if (err_sin_ref > max_err_sin_ref) {
        max_err_sin_ref = err_sin_ref;
        max_err_sin_x = x;
      }
      max_err_sin_cep = MAX(max_err_sin_cep, err_sin_cep);
      float cos_ref = cosf(x);
      float cos_cep = cephes_cosf(x);
      float err_cos_ref = fabs(cos_ref - cos_test);
      float err_cos_cep = fabs(cos_cep - cos_test);
      if (err_cos_ref > max_err_cos_ref) {
        max_err_cos_ref = err_cos_ref;
        max_err_cos_x = x;
      }
      max_err_cos_cep = MAX(max_err_cos_cep, err_cos_cep);
      float err_sum_sqr_test = fabs(1 - cos_test*cos_test - sin_test*sin_test);
      float err_sum_sqr_ref = fabs(1 - cos_ref*cos_ref - sin_ref*sin_ref);
      max_err_sum_sqr_ref = MAX(max_err_sum_sqr_ref, err_sum_sqr_ref);
      max_err_sum_sqr_test = MAX(max_err_sum_sqr_test, err_sum_sqr_test);
      //printf("sin(%g) = %g %g err=%g\n", x, sin_ref, sin_test, err_sin_ref);
    }
  }
  printf("max deviation from sinf(x): %g at %14.12g*Pi, max deviation from cephes_sin(x): %g\n",
         max_err_sin_ref, max_err_sin_x/M_PI, max_err_sin_cep);
  printf("max deviation from cosf(x): %g at %14.12g*Pi, max deviation from cephes_cos(x): %g\n",
         max_err_cos_ref, max_err_cos_x/M_PI, max_err_cos_cep);

  printf("deviation of sin(x)^2+cos(x)^2-1: %g (ref deviation is %g)\n",
         max_err_sum_sqr_test, max_err_sum_sqr_ref);

  if (max_err_sum_sqr_ref < 2e-7 && max_err_sin_ref < 2e-7 && max_err_cos_ref < 2e-7) {
    printf("   ->> precision OK for the sin_ps / cos_ps / sincos_ps <<-\n\n");
    return 0;
  } else {
    printf("\n   WRONG PRECISION !! there is a problem\n\n");
    return 1;
  }
}