Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ) { Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f ); Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f ); Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f ); //Vec4 v = VEC4_CONST( 1.0f ); //Vec4 v = row0; // row1, row2 Vec3 v3 = EstimatePrincipleComponent( matrix ); Vec4 v( v3.X(), v3.Y(), v3.Z(), 0.0f ); for( int i = 0; i < POWER_ITERATION_COUNT; ++i ) { // matrix multiply Vec4 w = row0*v.SplatX(); w = MultiplyAdd(row1, v.SplatY(), w); w = MultiplyAdd(row2, v.SplatZ(), w); // get max component from xyz in all channels Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ())); // divide through and advance v = w*Reciprocal(a); } return v.GetVec3(); }
int main(int /*argc*/, char** /*argv*/) { std::string opts = ReadFileIntoString("MultiplyAdd.input"); OptionParser parser(opts); double a = parser.Get<double>("A"); double b = parser.Get<double>("B"); double c = parser.Get<double>("C"); double result = MultiplyAdd(a, b, c); std::cout << "a is " << a << ", b is " << b << ", c is " << c << std::endl; std::cout << "The result of MultiplyAdd on a, b, & c is: " << result << std::endl; return EXIT_SUCCESS; }
Vec4 ClusterFit::SolveLeastSquares( Vec4& start, Vec4& end ) const { // accumulate all the quantities we need int const count = m_colours->GetCount(); Vec4 alpha2_sum = VEC4_CONST( 0.0f ); Vec4 beta2_sum = VEC4_CONST( 0.0f ); Vec4 alphabeta_sum = VEC4_CONST( 0.0f ); Vec4 alphax_sum = VEC4_CONST( 0.0f ); Vec4 betax_sum = VEC4_CONST( 0.0f ); for( int i = 0; i < count; ++i ) { Vec4 alpha = m_alpha[i]; Vec4 beta = m_beta[i]; Vec4 x = m_weighted[i]; alpha2_sum = MultiplyAdd( alpha, alpha, alpha2_sum ); beta2_sum = MultiplyAdd( beta, beta, beta2_sum ); alphabeta_sum = MultiplyAdd( alpha, beta, alphabeta_sum ); alphax_sum = MultiplyAdd( alpha, x, alphax_sum ); betax_sum = MultiplyAdd( beta, x, betax_sum ); } // select the results Vec4 const zero = VEC4_CONST( 0.0f ); Vec4 beta2_sum_zero = CompareEqual( beta2_sum, zero ); Vec4 alpha2_sum_zero = CompareEqual( alpha2_sum, zero ); Vec4 a1 = alphax_sum*Reciprocal( alpha2_sum ); Vec4 b1 = betax_sum*Reciprocal( beta2_sum ); Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) ); Vec4 a2 = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor; Vec4 b2 = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor; Vec4 a = Select( Select( a2, a1, beta2_sum_zero ), zero, alpha2_sum_zero ); Vec4 b = Select( Select( b2, b1, alpha2_sum_zero ), zero, beta2_sum_zero ); // clamp the output to [0, 1] Vec4 const one = VEC4_CONST( 1.0f ); Vec4 const half = VEC4_CONST( 0.5f ); a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); // clamp to the grid Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); Vec4 const onethird = VEC4_CONST( 1.0f/3.0f ); Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f ); a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp; // compute the error Vec4 const two = VEC4_CONST( 2.0 ); Vec4 e1 = MultiplyAdd( b*b, beta2_sum, m_xxsum ); Vec4 e2 = MultiplyAdd( a, alphax_sum, b*betax_sum ); Vec4 e3 = MultiplyAdd( a*a, alpha2_sum, e1 ); Vec4 e4 = MultiplyAdd( a*b*alphabeta_sum - e2, two, e3 ); // apply the metric to the error term Vec4 e5 = e4*m_metricSqr; Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); // save the start and end start = a; end = b; return error; }
void FastClusterFit::Compress4( void* block ) { Vec4 const one = VEC4_CONST(1.0f); Vec4 const zero = VEC4_CONST(0.0f); Vec4 const half = VEC4_CONST(0.5f); Vec4 const two = VEC4_CONST(2.0); Vec4 const onethird = VEC4_CONST( 1.0f/3.0f ); Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f ); // declare variables Vec4 beststart = VEC4_CONST( 0.0f ); Vec4 bestend = VEC4_CONST( 0.0f ); Vec4 besterror = VEC4_CONST( FLT_MAX ); Vec4 x0 = zero; int b0 = 0, b1 = 0, b2 = 0; int i = 0; // check all possible clusters for this total order for( int c0 = 0; c0 <= 16; c0++) { Vec4 x1 = zero; for( int c1 = 0; c1 <= 16-c0; c1++) { Vec4 x2 = zero; for( int c2 = 0; c2 <= 16-c0-c1; c2++) { Vec4 const constants = Vec4((const float *)&s_fourElement[i]); Vec4 const alpha2_sum = constants.SplatX(); Vec4 const beta2_sum = constants.SplatY(); Vec4 const alphabeta_sum = constants.SplatZ(); Vec4 const factor = constants.SplatW(); i++; Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); Vec4 const betax_sum = m_xsum - alphax_sum; Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; // clamp the output to [0, 1] a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); // clamp to the grid Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; // compute the error Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); // apply the metric to the error term Vec4 e4 = e3 * m_metricSqr; Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) { besterror = error; beststart = a; bestend = b; b0 = c0; b1 = c1; b2 = c2; } x2 += m_unweighted[c0+c1+c2]; } x1 += m_unweighted[c0+c1]; } x0 += m_unweighted[c0]; } // save the block if necessary if( CompareAnyLessThan( besterror, m_besterror ) ) { // compute indices from cluster sizes. /*uint bestindices = 0; { int i = b0; for(; i < b0+b1; i++) { bestindices = 2 << (2 * m_order[i]); } for(; i < b0+b1+b2; i++) { bestindices = 3 << (2 * m_order[i]); } for(; i < 16; i++) { bestindices = 1 << (2 * m_order[i]); } }*/ u8 bestindices[16]; { int i = 0; for(; i < b0; i++) { bestindices[i] = 0; } for(; i < b0+b1; i++) { bestindices[i] = 2; } for(; i < b0+b1+b2; i++) { bestindices[i] = 3; } for(; i < 16; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; for( int i = 0; i < 16; ++i ) ordered[m_order[i]] = bestindices[i]; // save the block WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block ); // save the error m_besterror = besterror; } }
void WeightedClusterFit::Compress4( void* block ) { int const count = m_colours->GetCount(); Vec4 const one = VEC4_CONST(1.0f); Vec4 const zero = VEC4_CONST(0.0f); Vec4 const half = VEC4_CONST(0.5f); Vec4 const two = VEC4_CONST(2.0); Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f ); Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); // declare variables Vec4 beststart = VEC4_CONST( 0.0f ); Vec4 bestend = VEC4_CONST( 0.0f ); Vec4 besterror = VEC4_CONST( FLT_MAX ); Vec4 x0 = zero; int b0 = 0, b1 = 0, b2 = 0; // check all possible clusters for this total order for( int c0 = 0; c0 < count; c0++) { Vec4 x1 = zero; for( int c1 = 0; c1 < count-c0; c1++) { Vec4 x2 = zero; for( int c2 = 0; c2 < count-c0-c1; c2++) { Vec4 const x3 = m_xsum - x2 - x1 - x0; //Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); //float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum Vec4 const alpha2_sum = alphax_sum.SplatW(); //Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f); //float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum Vec4 const beta2_sum = betax_sum.SplatW(); //float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; // compute the error (we skip the constant xxsum) Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); Vec4 e4 = MultiplyAdd( two, e3, e1 ); // apply the metric to the error term Vec4 e5 = e4 * m_metricSqr; Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) { besterror = error; beststart = a; bestend = b; b0 = c0; b1 = c1; b2 = c2; } x2 += m_weighted[c0+c1+c2]; } x1 += m_weighted[c0+c1]; } x0 += m_weighted[c0]; } // save the block if necessary if( CompareAnyLessThan( besterror, m_besterror ) ) { // compute indices from cluster sizes. u8 bestindices[16]; { int i = 0; for(; i < b0; i++) { bestindices[i] = 0; } for(; i < b0+b1; i++) { bestindices[i] = 2; } for(; i < b0+b1+b2; i++) { bestindices[i] = 3; } for(; i < count; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; for( int i = 0; i < count; ++i ) ordered[m_order[i]] = bestindices[i]; m_colours->RemapIndices( ordered, bestindices ); // save the block WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); // save the error m_besterror = besterror; } }
void BitoneClusterFit::ClusterFit4Constant(void* block) { // declare variables int const count = m_bitones->GetCount(); Vec4 const two = VEC4_CONST(2.0f); Vec4 const one = VEC4_CONST(1.0f); Vec4 const onethird_onethird2 (1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 9.0f); Vec4 const twothirds_twothirds2(2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f, 4.0f / 9.0f); Vec4 const twonineths = VEC4_CONST(2.0f / 9.0f); assume((count > 0) && (count <= 16)); // check all possible clusters and iterate on the total order Vec4 beststart = VEC4_CONST(0.0f); Vec4 bestend = VEC4_CONST(0.0f); Scr4 besterror = m_besterror; u8 bestindices[16]; int bestiteration = 0; int besti = 0, bestj = 0, bestk = 0; // prepare an ordering using the principle axis ConstructOrdering(m_principle, 0); // loop over iterations (we avoid the case that all points in first or last cluster) for (int iterationIndex = 0;;) { // cache some values Vec4 const xsum_wsum = m_xsum_wsum; // constants if weights == 1 Vec4 alphabeta_dltas = *((Vec4 *)part2delta[0]); Vec4 *alphabeta_inits = (Vec4 *)part2inits[0]; float *alphabeta_factors = (float *)part2factors; #if 0 Vec4 lasta = Vec4(0.0f); Vec4 lastb = xsum_wsum; Vec4 lastc = Vec4(0.0f); #endif // first cluster [0,i) is at the start Vec4 part0 = VEC4_CONST(0.0f); for (int i = 0; i < count; ++i) { // second cluster [i,j) is one third along Vec4 part1 = VEC4_CONST(0.0f); for (int j = i;;) { // third cluster [j,k) is two thirds along Vec4 part2 = (j == 0) ? m_points_weights[0] : VEC4_CONST(0.0f); Vec4 alphabeta_val = *alphabeta_inits++; int kmin = (j == 0) ? 1 : j; for (int k = kmin;;) { // TODO: the inner alphabeta_sum seems always to be the same sequence Vec4 alphabeta_factor = alphabeta_val * Vec4(*alphabeta_factors++); // compute least squares terms directly Vec4 const alphax_sum = MultiplyAdd(part2, onethird_onethird2, MultiplyAdd(part1, twothirds_twothirds2, part0)); Vec4 const betax_sum = /*MultiplyAdd(part1, onethird_onethird2, MultiplyAdd(part2, twothirds_twothirds2, part3))*/ xsum_wsum - alphax_sum; Vec4 const alpha2_sum = alphabeta_val.SplatX(); Vec4 const beta2_sum = alphabeta_val.SplatY(); Vec4 const alphabeta_sum = alphabeta_val.SplatZ(); Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_factor.SplatZ(), alphax_sum * alphabeta_factor.SplatY()); Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_factor.SplatZ(), betax_sum * alphabeta_factor.SplatX()); #if 0 // last cluster [k,count) is at the end Vec4 part3 = xsum_wsum - part2 - part1 - part0; // compute least squares terms directly Vec4 const _alphax_sum = MultiplyAdd(part2, onethird_onethird2, MultiplyAdd(part1, twothirds_twothirds2, part0)); Vec4 const _betax_sum = MultiplyAdd(part1, onethird_onethird2, MultiplyAdd(part2, twothirds_twothirds2, part3)); // Vec4 const _betac_sum = xsum_wsum - _alphax_sum; Vec4 const _alpha2_sum = _alphax_sum.SplatW(); Vec4 const _beta2_sum = _betax_sum.SplatW(); Vec4 const _alphabeta_sum = twonineths * (part1 + part2).SplatW(); // compute the least-squares optimal points Vec4 _factor = Reciprocal(NegativeMultiplySubtract(_alphabeta_sum, _alphabeta_sum, _alpha2_sum * _beta2_sum)); Vec4 _a = NegativeMultiplySubtract( _betax_sum, _alphabeta_sum, _alphax_sum * _beta2_sum) * _factor; Vec4 _b = NegativeMultiplySubtract(_alphax_sum, _alphabeta_sum, _betax_sum * _alpha2_sum) * _factor; #undef limit #define limit 2e-5 assert(fabs(_alpha2_sum.W() - alpha2_sum.X()) < limit); assert(fabs(_beta2_sum.W() - beta2_sum.X()) < limit); assert(fabs(_alphabeta_sum.W() - alphabeta_sum.X()) < limit); if (alphabeta_factors[-1] != FLT_MAX) { assert(fabs(_factor.W() - alphabeta_factors[-1]) < limit); assert(fabs(_alpha2_sum.W() * _factor.W() - alphabeta_factor.X()) < limit); assert(fabs(_beta2_sum.W() * _factor.W() - alphabeta_factor.Y()) < limit); assert(fabs(_alphabeta_sum.W() * _factor.W() - alphabeta_factor.Z()) < limit); assert(fabs(a.X() - _a.X()) < limit); assert(fabs(a.Y() - _a.Y()) < limit); assert(fabs(a.Z() - _a.Z()) < limit); assert(fabs(b.X() - _b.X()) < limit); assert(fabs(b.Y() - _b.Y()) < limit); assert(fabs(b.Z() - _b.Z()) < limit); } #if 0 fprintf(stderr, "{%.9ff},", _factor.W()); if (k == kmin) fprintf(stderr, "{%.9f, %.9f, %.9f},\n", alpha2_sum.W(), beta2_sum.W(), alphabeta_sum.W()); fprintf(stderr, "{%.9f/*%.9f*/,%.9f/*%.9f*/,%.9f,%.9f},\n", alpha2_sum.W(), lasta.W() - alpha2_sum.W(), beta2_sum.W(), lastb.W() - beta2_sum.W(), alphabeta_sum.W(), lastc.W() - alphabeta_sum.W(), factor.W()); lasta = alpha2_sum; lastb = beta2_sum; lastc = alphabeta_sum; #endif #endif // snap floating-point-values to the integer-lattice a = Truncate(a * 255.0f) * (1.0f / 255.0f); b = Truncate(b * 255.0f) * (1.0f / 255.0f); // compute the error (we skip the constant xxsum) Vec4 e1 = MultiplyAdd(a * a, alpha2_sum, b * b * beta2_sum); Vec4 e2 = NegativeMultiplySubtract(a, alphax_sum, a * b * alphabeta_sum); Vec4 e3 = NegativeMultiplySubtract(b, betax_sum, e2); Vec4 e4 = MultiplyAdd(two, e3, e1); // apply the metric to the error term Scr4 eS = e4; // keep the solution if it wins if (besterror > eS) { besterror = eS; beststart = a; bestend = b; bestiteration = iterationIndex; besti = i; bestj = j; bestk = k; } alphabeta_val += alphabeta_dltas; // advance if (k == count) break; part2 += m_points_weights[k]; ++k; } // advance if (j == count) break; part1 += m_points_weights[j]; ++j; } // advance part0 += m_points_weights[i]; } // stop if we didn't improve in this iteration if (bestiteration != iterationIndex) break; // advance if possible ++iterationIndex; if (iterationIndex == m_iterationCount) break; // stop if a new iteration is an ordering that has already been tried Vec3 axis = (bestend - beststart).GetVec3(); if (!ConstructOrdering(axis, iterationIndex)) break; } // save the block if necessary if (besterror < m_besterror) { // save the error m_besterror = besterror; // remap the indices u8 const* order = (u8*)m_order + 16 * bestiteration; u8 unordered[16]; for (int m = 0; m < besti; ++m) unordered[order[m]] = 0; for (int m = besti; m < bestj; ++m) unordered[order[m]] = 2; for (int m = bestj; m < bestk; ++m) unordered[order[m]] = 3; for (int m = bestk; m < count; ++m) unordered[order[m]] = 1; m_bitones->RemapIndices(unordered, bestindices); // save the block WriteBitoneBlock4(beststart.GetVec3(), bestend.GetVec3(), bestindices, block); } }
void BitoneClusterFit::ClusterFit4(void* block) { // declare variables int const count = m_bitones->GetCount(); Vec4 const two = VEC4_CONST(2.0f); Vec4 const one = VEC4_CONST(1.0f); Vec4 const onethird_onethird2 (1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 9.0f); Vec4 const twothirds_twothirds2(2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f, 4.0f / 9.0f); Vec4 const twonineths = VEC4_CONST(2.0f / 9.0f); assume((count > 0) && (count <= 16)); // prepare an ordering using the principle axis ConstructOrdering(m_principle, 0); // check all possible clusters and iterate on the total order Vec4 beststart = VEC4_CONST(0.0f); Vec4 bestend = VEC4_CONST(0.0f); Scr4 besterror = m_besterror; u8 bestindices[16]; int bestiteration = 0; int besti = 0, bestj = 0, bestk = 0; // loop over iterations (we avoid the case that all points in first or last cluster) for (int iterationIndex = 0;;) { // first cluster [0,i) is at the start Vec4 part0 = VEC4_CONST(0.0f); for (int i = 0; i < count; ++i) { // second cluster [i,j) is one third along Vec4 part1 = VEC4_CONST(0.0f); for (int j = i;;) { // third cluster [j,k) is two thirds along Vec4 part2 = (j == 0) ? m_points_weights[0] : VEC4_CONST(0.0f); int kmin = (j == 0) ? 1 : j; for (int k = kmin;;) { // last cluster [k,count) is at the end Vec4 part3 = m_xsum_wsum - part2 - part1 - part0; // compute least squares terms directly Vec4 const alphax_sum = MultiplyAdd(part2, onethird_onethird2, MultiplyAdd(part1, twothirds_twothirds2, part0)); Vec4 const betax_sum = MultiplyAdd(part1, onethird_onethird2, MultiplyAdd(part2, twothirds_twothirds2, part3)); Vec4 const alpha2_sum = alphax_sum.SplatW(); Vec4 const beta2_sum = betax_sum.SplatW(); Vec4 const alphabeta_sum = twonineths * (part1 + part2).SplatW(); // compute the least-squares optimal points Vec4 factor = Reciprocal(NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum * beta2_sum)); Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum * beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum * alpha2_sum) * factor; // snap floating-point-values to the integer-lattice a = Truncate(a * 255.0f) * (1.0f / 255.0f); b = Truncate(b * 255.0f) * (1.0f / 255.0f); // compute the error (we skip the constant xxsum) Vec4 e1 = MultiplyAdd(a * a, alpha2_sum, b * b * beta2_sum); Vec4 e2 = NegativeMultiplySubtract(a, alphax_sum, a * b * alphabeta_sum); Vec4 e3 = NegativeMultiplySubtract(b, betax_sum, e2); Vec4 e4 = MultiplyAdd(two, e3, e1); // apply the metric to the error term Scr4 eS = e4; // keep the solution if it wins if (besterror > eS) { besterror = eS; beststart = a; bestend = b; besti = i; bestj = j; bestk = k; bestiteration = iterationIndex; } // advance if (k == count) break; part2 += m_points_weights[k]; ++k; } // advance if (j == count) break; part1 += m_points_weights[j]; ++j; } // advance part0 += m_points_weights[i]; } // stop if we didn't improve in this iteration if (bestiteration != iterationIndex) break; // advance if possible ++iterationIndex; if (iterationIndex == m_iterationCount) break; // stop if a new iteration is an ordering that has already been tried Vec3 axis = (bestend - beststart).GetVec3(); if (!ConstructOrdering(axis, iterationIndex)) break; } // save the block if necessary if (besterror < m_besterror) { // save the error m_besterror = besterror; // remap the indices u8 const* order = (u8*)m_order + 16 * bestiteration; u8 unordered[16]; for (int m = 0; m < besti; ++m) unordered[order[m]] = 0; for (int m = besti; m < bestj; ++m) unordered[order[m]] = 2; for (int m = bestj; m < bestk; ++m) unordered[order[m]] = 3; for (int m = bestk; m < count; ++m) unordered[order[m]] = 1; m_bitones->RemapIndices(unordered, bestindices); // save the block WriteBitoneBlock4(beststart.GetVec3(), bestend.GetVec3(), bestindices, block); } }