void check_special_values() { V4SF vx; vx.f[0] = -1000; vx.f[1] = -100; vx.f[2] = 100; vx.f[3] = 1000; printf("exp("); print4(vx.v); printf(") = "); print4(exp_ps(vx.v)); printf("\n"); vx.f[0] = QNAN.f; vx.f[1] = PINF.f; vx.f[2] = MINF.f; vx.f[3] = QNAN2.f; printf("exp("); print4(vx.v); printf(") = "); print4(exp_ps(vx.v)); printf("\n"); vx.f[0] = 0; vx.f[1] = -10; vx.f[2] = 1e30f; vx.f[3] = 1e-42f; printf("log("); print4(vx.v); printf(") = "); print4(log_ps(vx.v)); printf("\n"); vx.f[0] = QNAN.f; vx.f[1] = PINF.f; vx.f[2] = MINF.f; vx.f[3] = QNAN2.f; printf("log("); print4(vx.v); printf(") = "); print4(log_ps(vx.v)); printf("\n"); printf("sin("); print4(vx.v); printf(") = "); print4(sin_ps(vx.v)); printf("\n"); printf("cos("); print4(vx.v); printf(") = "); print4(cos_ps(vx.v)); printf("\n"); vx.f[0] = -1e30; vx.f[1] = -100000; vx.f[2] = 1e30; vx.f[3] = 100000; printf("sin("); print4(vx.v); printf(") = "); print4(sin_ps(vx.v)); printf("\n"); printf("cos("); print4(vx.v); printf(") = "); print4(cos_ps(vx.v)); printf("\n"); }
// SIMD sin __SIMD _SIMD_sin_ps(__SIMD a) { #ifdef USE_SSE return sin_ps(a); #elif defined USE_AVX return sin256_ps(a); #endif }
float Sin(float angleRadians) { #ifdef MATH_USE_SINCOS_LOOKUPTABLE return sin_lookuptable(angleRadians); #elif defined(MATH_SSE2) // Do range reduction by 2pi before calling sin - this enchances precision of sin_ps a lot return s4f_x(sin_ps(modf_ps(setx_ps(angleRadians), pi2))); #else return sinf(angleRadians); #endif }
//----------------------------------------------------------------------------------- ArrayReal MathlibNEON::Sin4( ArrayReal x ) { // Map arbitrary angle x to the range [-pi; +pi] without using division. // Code taken from MSDN's HLSL trick. Architectures with fused mad (i.e. NEON) // can replace the add, the sub, & the two muls for two mad ArrayReal integralPart; x = vaddq_f32( vmulq_f32( x, ONE_DIV_2PI ), HALF ); x = Modf4( x, integralPart ); x = vsubq_f32( vmulq_f32( x, TWO_PI ), PI ); return sin_ps( x ); }
ArrayFloat MathlibSSE2::Sin4( ArrayFloat x ) { // Map arbitrary angle x to the range [-pi; +pi] without using division. // Code taken from MSDN's HLSL trick. Architectures with fused mad (i.e. NEON) // can replace the add, the sub, & the two muls for two mad ArrayFloat integralPart; x = _mm_add_ps( _mm_mul_ps( x, ONE_DIV_2PI ), HALF ); x = Modf4( x, integralPart ); x = _mm_sub_ps( _mm_mul_ps( x, TWO_PI ), PI ); return sin_ps( x ); }
/** Implementation based on the math in the book Watt, Policarpo. 3D Games: Real-time rendering and Software Technology, pp. 383-386. */ Quat MUST_USE_RESULT Quat::Slerp(const Quat &q2, float t) const { ///\todo SSE. assume(0.f <= t && t <= 1.f); assume(IsNormalized()); assume(q2.IsNormalized()); float angle = this->Dot(q2); float sign = 1.f; // Multiply by a sign of +/-1 to guarantee we rotate the shorter arc. if (angle < 0.f) { angle = -angle; sign = -1.f; } float a; float b; if (angle <= 0.97f) // perform spherical linear interpolation. { angle = Acos(angle); // After this, angle is in the range pi/2 -> 0 as the original angle variable ranged from 0 -> 1. float angleT = t*angle; #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE) // Compute three sines in one go with SSE. simd4f s = set_ps(0.f, angleT, angle - angleT, angle); s = sin_ps(s); simd4f denom = shuffle1_ps(s, _MM_SHUFFLE(0, 0, 0, 0)); s = div_ps(s, denom); a = s4f_y(s); b = s4f_z(s); #else float s[3] = { Sin(angle), Sin(angle - angleT), Sin(angleT) }; float c = 1.f / s[0]; a = s[1] * c; b = s[2] * c; #endif } else // If angle is close to taking the denominator to zero, resort to linear interpolation (and normalization). { a = 1.f - t; b = t; } return (*this * (a * sign) + q2 * b).Normalized(); }
int check_sincos_precision(float xmin, float xmax) { unsigned nb_trials = 100000; printf("checking sines on [%g*Pi, %g*Pi]\n", xmin, xmax); float max_err_sin_ref = 0, max_err_sin_cep = 0, max_err_sin_x = 0; float max_err_cos_ref = 0, max_err_cos_cep = 0, max_err_cos_x = 0; float max_err_sum_sqr_test = 0; float max_err_sum_sqr_ref = 0; xmin *= M_PI; xmax *= M_PI; unsigned i; for (i=0; i < nb_trials; ++i) { V4SF vx, sin4, cos4, sin4_2, cos4_2; vx.f[0] = i*(xmax-xmin)/(nb_trials-1) + xmin; vx.f[1] = (i+.5)*(xmax-xmin)/(nb_trials-1) + xmin; vx.f[2] = frand()*(xmax-xmin); vx.f[3] = (i / 32)*M_PI/((i%32)+1); if (vx.f[3] < xmin || vx.f[3] > xmax) vx.f[3] = frand()*(xmax-xmin); /* vx.f[0] = M_PI/2; vx.f[1] = M_PI; vx.f[2] = M_PI/3; vx.f[3] = M_PI/4; */ sin4.v = sin_ps(vx.v); cos4.v = cos_ps(vx.v); sincos_ps(vx.v, &sin4_2.v, &cos4_2.v); unsigned j; for (j=0; j < 4; ++j) { float x = vx.f[j]; float sin_test = sin4.f[j]; float cos_test = cos4.f[j]; if (sin_test != sin4_2.f[j]) { printf("sin / sincos mismatch at x=%g\n", x); exit(1); return 1; } if (cos_test != cos4_2.f[j]) { printf("cos / sincos mismatch at x=%g\n", x); return 1; } float sin_ref = sinf(x); float sin_cep = cephes_sinf(x); float err_sin_ref = fabs(sin_ref - sin_test); float err_sin_cep = fabs(sin_cep - sin_test); if (err_sin_ref > max_err_sin_ref) { max_err_sin_ref = err_sin_ref; max_err_sin_x = x; } max_err_sin_cep = MAX(max_err_sin_cep, err_sin_cep); float cos_ref = cosf(x); float cos_cep = cephes_cosf(x); float err_cos_ref = fabs(cos_ref - cos_test); float err_cos_cep = fabs(cos_cep - cos_test); if (err_cos_ref > max_err_cos_ref) { max_err_cos_ref = err_cos_ref; max_err_cos_x = x; } max_err_cos_cep = MAX(max_err_cos_cep, err_cos_cep); float err_sum_sqr_test = fabs(1 - cos_test*cos_test - sin_test*sin_test); float err_sum_sqr_ref = fabs(1 - cos_ref*cos_ref - sin_ref*sin_ref); max_err_sum_sqr_ref = MAX(max_err_sum_sqr_ref, err_sum_sqr_ref); max_err_sum_sqr_test = MAX(max_err_sum_sqr_test, err_sum_sqr_test); //printf("sin(%g) = %g %g err=%g\n", x, sin_ref, sin_test, err_sin_ref); } } printf("max deviation from sinf(x): %g at %14.12g*Pi, max deviation from cephes_sin(x): %g\n", max_err_sin_ref, max_err_sin_x/M_PI, max_err_sin_cep); printf("max deviation from cosf(x): %g at %14.12g*Pi, max deviation from cephes_cos(x): %g\n", max_err_cos_ref, max_err_cos_x/M_PI, max_err_cos_cep); printf("deviation of sin(x)^2+cos(x)^2-1: %g (ref deviation is %g)\n", max_err_sum_sqr_test, max_err_sum_sqr_ref); if (max_err_sum_sqr_ref < 2e-7 && max_err_sin_ref < 2e-7 && max_err_cos_ref < 2e-7) { printf(" ->> precision OK for the sin_ps / cos_ps / sincos_ps <<-\n\n"); return 0; } else { printf("\n WRONG PRECISION !! there is a problem\n\n"); return 1; } }