Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) { Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f ); Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f ); Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f ); //Vec4 v = VEC4_CONST( 1.0f ); //Vec4 v = row0; // row1, row2 Vec3 v3 = EstimatePrincipleComponent( matrix ); Vec4 v( v3.X(), v3.Y(), v3.Z(), 0.0f ); for( int i = 0; i < POWER_ITERATION_COUNT; ++i ) { // matrix multiply Vec4 w = row0*v.SplatX(); w = MultiplyAdd(row1, v.SplatY(), w); w = MultiplyAdd(row2, v.SplatZ(), w); // get max component from xyz in all channels Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ())); // divide through and advance v = w*Reciprocal(a); } return v.GetVec3(); }
float FastClusterFit::GetBestError() const { #if SQUISH_USE_SIMD Vec4 x = m_xxsum * m_metricSqr; Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ(); return error.GetVec3().X(); #else return m_besterror + Dot(m_xxsum, m_metricSqr); #endif }
void FastClusterFit::Compress4( void* block ) { Vec4 const one = VEC4_CONST(1.0f); Vec4 const zero = VEC4_CONST(0.0f); Vec4 const half = VEC4_CONST(0.5f); Vec4 const two = VEC4_CONST(2.0); Vec4 const onethird = VEC4_CONST( 1.0f/3.0f ); Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f ); // declare variables Vec4 beststart = VEC4_CONST( 0.0f ); Vec4 bestend = VEC4_CONST( 0.0f ); Vec4 besterror = VEC4_CONST( FLT_MAX ); Vec4 x0 = zero; int b0 = 0, b1 = 0, b2 = 0; int i = 0; // check all possible clusters for this total order for( int c0 = 0; c0 <= 16; c0++) { Vec4 x1 = zero; for( int c1 = 0; c1 <= 16-c0; c1++) { Vec4 x2 = zero; for( int c2 = 0; c2 <= 16-c0-c1; c2++) { Vec4 const constants = Vec4((const float *)&s_fourElement[i]); Vec4 const alpha2_sum = constants.SplatX(); Vec4 const beta2_sum = constants.SplatY(); Vec4 const alphabeta_sum = constants.SplatZ(); Vec4 const factor = constants.SplatW(); i++; Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); Vec4 const betax_sum = m_xsum - alphax_sum; Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; // clamp the output to [0, 1] a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); // clamp to the grid Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; // compute the error Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); // apply the metric to the error term Vec4 e4 = e3 * m_metricSqr; Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) { besterror = error; beststart = a; bestend = b; b0 = c0; b1 = c1; b2 = c2; } x2 += m_unweighted[c0+c1+c2]; } x1 += m_unweighted[c0+c1]; } x0 += m_unweighted[c0]; } // save the block if necessary if( CompareAnyLessThan( besterror, m_besterror ) ) { // compute indices from cluster sizes. /*uint bestindices = 0; { int i = b0; for(; i < b0+b1; i++) { bestindices = 2 << (2 * m_order[i]); } for(; i < b0+b1+b2; i++) { bestindices = 3 << (2 * m_order[i]); } for(; i < 16; i++) { bestindices = 1 << (2 * m_order[i]); } }*/ u8 bestindices[16]; { int i = 0; for(; i < b0; i++) { bestindices[i] = 0; } for(; i < b0+b1; i++) { bestindices[i] = 2; } for(; i < b0+b1+b2; i++) { bestindices[i] = 3; } for(; i < 16; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; for( int i = 0; i < 16; ++i ) ordered[m_order[i]] = bestindices[i]; // save the block WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block ); // save the error m_besterror = besterror; } }