예제 #1
0
float Cos(float angleRadians)
{
#ifdef MATH_USE_SINCOS_LOOKUPTABLE
	return cos_lookuptable(angleRadians);
#elif defined(MATH_SSE2)
	// Do range reduction by 2pi before calling cos - this enchances precision of cos_ps a lot
	return s4f_x(cos_ps(modf_ps(setx_ps(angleRadians), pi2)));
#else
	return cosf(angleRadians);
#endif
}
예제 #2
0
void SinCos(float angleRadians, float &outSin, float &outCos)
{
#ifdef MATH_USE_SINCOS_LOOKUPTABLE
	return sincos_lookuptable(angleRadians, outSin, outCos);
#elif defined(MATH_SSE2)
	__m128 angle = modf_ps(setx_ps(angleRadians), pi2);
	__m128 sin, cos;
	sincos_ps(angle, &sin, &cos);
	outSin = s4f_x(sin);
	outCos = s4f_x(cos);
#else
	outSin = Sin(angleRadians);
	outCos = Cos(angleRadians);
#endif
}
예제 #3
0
bool AABB::IntersectLineAABB_SSE(const float4 &rayPos, const float4 &rayDir, float tNear, float tFar) const
{
	assume(rayDir.IsNormalized4());
	assume(tNear <= tFar && "AABB::IntersectLineAABB: User gave a degenerate line as input for the intersection test!");
	/* For reference, this is the C++ form of the vectorized SSE code below.

	float4 recipDir = rayDir.RecipFast4();
	float4 t1 = (aabbMinPoint - rayPos).Mul(recipDir);
	float4 t2 = (aabbMaxPoint - rayPos).Mul(recipDir);
	float4 near = t1.Min(t2);
	float4 far = t1.Max(t2);
	float4 rayDirAbs = rayDir.Abs();

	if (rayDirAbs.x > 1e-4f) // ray is parallel to plane in question
	{
		tNear = Max(near.x, tNear); // tNear tracks distance to intersect (enter) the AABB.
		tFar = Min(far.x, tFar); // tFar tracks the distance to exit the AABB.
	}
	else if (rayPos.x < aabbMinPoint.x || rayPos.x > aabbMaxPoint.x) // early-out if the ray can't possibly enter the box.
		return false;

	if (rayDirAbs.y > 1e-4f) // ray is parallel to plane in question
	{
		tNear = Max(near.y, tNear); // tNear tracks distance to intersect (enter) the AABB.
		tFar = Min(far.y, tFar); // tFar tracks the distance to exit the AABB.
	}
	else if (rayPos.y < aabbMinPoint.y || rayPos.y > aabbMaxPoint.y) // early-out if the ray can't possibly enter the box.
		return false;

	if (rayDirAbs.z > 1e-4f) // ray is parallel to plane in question
	{
		tNear = Max(near.z, tNear); // tNear tracks distance to intersect (enter) the AABB.
		tFar = Min(far.z, tFar); // tFar tracks the distance to exit the AABB.
	}
	else if (rayPos.z < aabbMinPoint.z || rayPos.z > aabbMaxPoint.z) // early-out if the ray can't possibly enter the box.
		return false;

	return tNear < tFar;
	*/

	simd4f recipDir = rcp_ps(rayDir.v);
	// Note: The above performs an approximate reciprocal (11 bits of precision).
	// For a full precision reciprocal, perform a div:
//	simd4f recipDir = div_ps(set1_ps(1.f), rayDir.v);

	simd4f t1 = mul_ps(sub_ps(minPoint, rayPos.v), recipDir);
	simd4f t2 = mul_ps(sub_ps(maxPoint, rayPos.v), recipDir);

	simd4f nearD = min_ps(t1, t2); // [0 n3 n2 n1]
	simd4f farD = max_ps(t1, t2);  // [0 f3 f2 f1]

	// Check if the ray direction is parallel to any of the cardinal axes, and if so,
	// mask those [near, far] ranges away from the hit test computations.
	simd4f rayDirAbs = abs_ps(rayDir.v);

	const simd4f epsilon = set1_ps(1e-4f);
	// zeroDirections[i] will be nonzero for each axis i the ray is parallel to.
	simd4f zeroDirections = cmple_ps(rayDirAbs, epsilon);

	const simd4f floatInf = set1_ps(FLOAT_INF);
	const simd4f floatNegInf = set1_ps(-FLOAT_INF);

	// If the ray is parallel to one of the axes, replace the slab range for that axis
	// with [-inf, inf] range instead. (which is a no-op in the comparisons below)
	nearD = cmov_ps(nearD, floatNegInf, zeroDirections);
	farD = cmov_ps(farD, floatInf, zeroDirections);

	// Next, we need to compute horizontally max(nearD[0], nearD[1], nearD[2]) and min(farD[0], farD[1], farD[2])
	// to see if there is an overlap in the hit ranges.
	simd4f v1 = axx_bxx_ps(nearD, farD); // [f1 f1 n1 n1]
	simd4f v2 = ayy_byy_ps(nearD, farD); // [f2 f2 n2 n2]
	simd4f v3 = azz_bzz_ps(nearD, farD); // [f3 f3 n3 n3]
	nearD = max_ps(v1, max_ps(v2, v3));
	farD = min_ps(v1, min_ps(v2, v3));
	farD = wwww_ps(farD); // Unpack the result from high offset in the register.
	nearD = max_ps(nearD, setx_ps(tNear));
	farD = min_ps(farD, setx_ps(tFar));

	// Finally, test if the ranges overlap.
	simd4f rangeIntersects = cmple_ps(nearD, farD); // Only x channel used, higher ones ignored.

	// To store out out the interval of intersection, uncomment the following:
	// These are disabled, since without these, the whole function runs without a single memory store,
	// which has been profiled to be very fast! Uncommenting these causes an order-of-magnitude slowdown.
	// For now, using the SSE version only where the tNear and tFar ranges are not interesting.
//	_mm_store_ss(&tNear, nearD);
//	_mm_store_ss(&tFar, farD);

	// To avoid false positives, need to have an additional rejection test for each cardinal axis the ray direction
	// is parallel to.
	simd4f out2 = cmplt_ps(rayPos.v, minPoint);
	simd4f out3 = cmpgt_ps(rayPos.v, maxPoint);
	out2 = or_ps(out2, out3);
	zeroDirections = and_ps(zeroDirections, out2);

	simd4f yOut = yyyy_ps(zeroDirections);
	simd4f zOut = zzzz_ps(zeroDirections);

	zeroDirections = or_ps(or_ps(zeroDirections, yOut), zOut);
	// Intersection occurs if the slab ranges had positive overlap and if the test was not rejected by the ray being
	// parallel to some cardinal axis.
	simd4f intersects = andnot_ps(zeroDirections, rangeIntersects);
	simd4f epsilonMasked = and_ps(epsilon, intersects);
	return comieq_ss(epsilon, epsilonMasked) != 0;
}