Quat MUST_USE_RESULT Quat::RotateFromTo(const float4 &sourceDirection, const float4 &targetDirection) { #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE) // Best: 12.289 nsecs / 33.144 ticks, Avg: 12.489 nsecs, Worst: 14.210 nsecs simd4f cosAngle = dot4_ps(sourceDirection.v, targetDirection.v); cosAngle = negate3_ps(cosAngle); // [+ - - -] // XYZ channels use the trigonometric formula sin(x/2) = +/-sqrt(0.5-0.5*cosx)) // The W channel uses the trigonometric formula cos(x/2) = +/-sqrt(0.5+0.5*cosx)) simd4f half = set1_ps(0.5f); simd4f cosSinHalfAngle = sqrt_ps(add_ps(half, mul_ps(half, cosAngle))); // [cos(x/2), sin(x/2), sin(x/2), sin(x/2)] simd4f axis = cross_ps(sourceDirection.v, targetDirection.v); simd4f recipLen = rsqrt_ps(dot4_ps(axis, axis)); axis = mul_ps(axis, recipLen); // [0 z y x] // Set the w component to one. simd4f one = add_ps(half, half); // [1 1 1 1] simd4f highPart = _mm_unpackhi_ps(axis, one); // [_ _ 1 z] axis = _mm_movelh_ps(axis, highPart); // [1 z y x] Quat q; q.q = mul_ps(axis, cosSinHalfAngle); return q; #else // Best: 19.970 nsecs / 53.632 ticks, Avg: 20.197 nsecs, Worst: 21.122 nsecs assume(EqualAbs(sourceDirection.w, 0.f)); assume(EqualAbs(targetDirection.w, 0.f)); return Quat::RotateFromTo(sourceDirection.xyz(), targetDirection.xyz()); #endif }
float3x4 &float3x4::operator +=(const float3x4 &rhs) { #ifdef MATH_SIMD row[0] = add_ps(row[0], rhs.row[0]); row[1] = add_ps(row[1], rhs.row[1]); row[2] = add_ps(row[2], rhs.row[2]); #else for(int y = 0; y < Rows; ++y) for(int x = 0; x < Cols; ++x) v[y][x] += rhs[y][x]; #endif return *this; }
float3x4 float3x4::operator +(const float3x4 &rhs) const { #ifdef MATH_SIMD float3x4 r; r.row[0] = add_ps(row[0], rhs.row[0]); r.row[1] = add_ps(row[1], rhs.row[1]); r.row[2] = add_ps(row[2], rhs.row[2]); #else float3x4 r = *this; r += rhs; #endif return r; }
void AABBTransformAsAABB_SIMD(AABB &aabb, const float4x4 &m) { simd4f minPt = aabb.minPoint; simd4f maxPt = aabb.maxPoint; simd4f centerPoint = muls_ps(add_ps(minPt, maxPt), 0.5f); simd4f newCenter = mat4x4_mul_vec4(m.row, centerPoint); simd4f halfSize = sub_ps(centerPoint, minPt); simd4f x = abs_ps(mul_ps(m.row[0], halfSize)); simd4f y = abs_ps(mul_ps(m.row[1], halfSize)); simd4f z = abs_ps(mul_ps(m.row[2], halfSize)); simd4f w = zero_ps(); simd4f newDir = hadd4_ps(x, y, z, w); aabb.minPoint = sub_ps(newCenter, newDir); aabb.maxPoint = add_ps(newCenter, newDir); }
Quat Quat::operator +(const Quat &rhs) const { #ifdef MATH_AUTOMATIC_SSE return add_ps(q, rhs.q); #else return Quat(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w); #endif }