void SingleColourFit::Compress3( void* block ) { // build the table of lookups SingleColourLookup const* const lookups[] = { lookup_5_3, lookup_6_3, lookup_5_3 }; // find the best end-points and index ComputeEndPoints( lookups ); // build the block if we win if( m_error < m_besterror ) { // remap the indices u8 indices[16]; m_colours->RemapIndices( &m_index, indices ); // save the block WriteColourBlock3( m_start, m_end, indices, block ); // save the error m_besterror = m_error; } }
void ColourNormalFit::Compress3(void* block) { const Vec3 scale = Vec3( 1.0f / 0.5f); const Vec3 offset = Vec3(-1.0f * 0.5f); // cache some values int const count = m_colours->GetCount(); Vec3 const* values = m_colours->GetPoints(); Scr3 const* freq = m_colours->GetWeights(); // use a fitting algorithm m_start = m_start_candidate; m_end = m_end_candidate; if ((m_flags & kColourIterativeClusterFits)) kMeans3(); //if ((m_flags & kColourIterativeClusterFits) >= (kColourIterativeClusterFit)) // Permute3(); // create a codebook // resolve "metric * (value - code)" to "metric * value - metric * code" Vec3 codes[3]; Codebook3n(codes, m_start, m_end); // match each point to the closest code u8 closest[16]; Scr3 error = Scr3(DEVIANCE_BASE); for (int i = 0; i < count; ++i) { int idx = 0; // find the closest code Vec3 value = Normalize(scale * (offset + values[i])); Scr3 dist; MinDeviance3<true>(dist, idx, value, codes); // accumulate the error AddDeviance(dist, error, freq[i]); // save the index closest[i] = (u8)idx; } // save this scheme if it wins if (error < m_besterror) { // save the error m_besterror = error; // remap the indices u8 indices[16]; m_colours->RemapIndices(closest, indices); // save the block WriteColourBlock3(m_start, m_end, indices, block); } }
void RangeFit::Compress3( void* block ) { // cache some values int const count = m_colours->GetCount(); Vec3 const* values = m_colours->GetPoints(); // create a codebook Vec3 codes[3]; codes[0] = m_start; codes[1] = m_end; codes[2] = 0.5f*m_start + 0.5f*m_end; // match each point to the closest code u8 closest[16]; float error = 0.0f; for( int i = 0; i < count; ++i ) { // find the closest code float dist = FLT_MAX; int idx = 0; for( int j = 0; j < 3; ++j ) { float d = LengthSquared( m_metric*( values[i] - codes[j] ) ); if( d < dist ) { dist = d; idx = j; } } // save the index closest[i] = ( u8 )idx; // accumulate the error error += dist; } // save this scheme if it wins if( error < m_besterror ) { // remap the indices u8 indices[16]; m_colours->RemapIndices( closest, indices ); // save the block WriteColourBlock3( m_start, m_end, indices, block ); // save the error m_besterror = error; } }
void ClusterFit::Compress3( void* block ) { // declare variables int const count = m_colours->GetCount(); #if SQUISH_USE_SIMD Vec4 beststart = VEC4_CONST( 0.0f ); Vec4 bestend = VEC4_CONST( 0.0f ); Vec4 besterror = VEC4_CONST( FLT_MAX ); Vec4 const half = VEC4_CONST( 0.5f ); Vec4 const zero = VEC4_CONST( 0.0f ); #else Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); float besterror = FLT_MAX; float const half = 0.5f; float const zero = 0.0f; #endif // check all possible clusters for this total order u8 indices[16]; u8 bestindices[16]; // first cluster [0,i) is at the start for( int m = 0; m < count; ++m ) { indices[m] = 0; m_alpha[m] = m_weights[m]; m_beta[m] = zero; } for( int i = count; i >= 0; --i ) { // second cluster [i,j) is half along for( int m = i; m < count; ++m ) { indices[m] = 2; m_alpha[m] = m_beta[m] = half*m_weights[m]; } for( int j = count; j > i; --j ) { // last cluster [j,k) is at the end if( j < count ) { indices[j] = 1; m_alpha[j] = zero; m_beta[j] = m_weights[j]; } // solve a least squares problem to place the endpoints #if SQUISH_USE_SIMD Vec4 start, end; Vec4 error = SolveLeastSquares( start, end ); #else Vec3 start, end; float error = SolveLeastSquares( start, end ); #endif // keep the solution if it wins #if SQUISH_USE_SIMD if( CompareAnyLessThan( error, besterror ) ) #else if( error < besterror ) #endif { beststart = start; bestend = end; for( int m = 0; m < 16; ++m ) // TODO: make this faster? bestindices[m] = indices[m]; besterror = error; } } } // save the block if necessary #if SQUISH_USE_SIMD if( CompareAnyLessThan( besterror, m_besterror ) ) #else if( besterror < m_besterror ) #endif { // remap the indices u8 unordered[16]; for( int i = 0; i < count; ++i ) unordered[m_order[i]] = bestindices[i]; m_colours->RemapIndices( unordered, bestindices ); // save the block #if SQUISH_USE_SIMD WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); #else WriteColourBlock3( beststart, bestend, bestindices, block ); #endif // save the error m_besterror = besterror; } }
void FastClusterFit::Compress3( void* block ) { // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); float besterror = FLT_MAX; Vec3 x0(0.0f); Vec3 x1; int b0 = 0, b1 = 0; int i = 0; // check all possible clusters for this total order for( int c0 = 0; c0 <= 16; c0++) { x1 = Vec3(0); for( int c1 = 0; c1 <= 16-c0; c1++) { float const alpha2_sum = s_threeElement[i].alpha2_sum; float const beta2_sum = s_threeElement[i].beta2_sum; float const alphabeta_sum = s_threeElement[i].alphabeta_sum; float const factor = s_threeElement[i].factor; i++; Vec3 const alphax_sum = x0 + x1 * 0.5f; Vec3 const betax_sum = m_xsum - alphax_sum; Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor; Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; // clamp the output to [0, 1] Vec3 const one( 1.0f ); Vec3 const zero( 0.0f ); a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); // clamp to the grid Vec3 const grid( 31.0f, 63.0f, 31.0f ); Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f ); Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; // compute the error Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); // apply the metric to the error term float error = Dot( e1, m_metricSqr ); // keep the solution if it wins if( error < besterror ) { besterror = error; beststart = a; bestend = b; b0 = c0; b1 = c1; } x1 += m_unweighted[c0+c1]; } x0 += m_unweighted[c0]; } // save the block if necessary if( besterror < m_besterror ) { // compute indices from cluster sizes. /*uint bestindices = 0; { int i = b0; for(; i < b0+b1; i++) { bestindices |= 2 << (2 * m_order[i]); } for(; i < 16; i++) { bestindices |= 1 << (2 * m_order[i]); } }*/ u8 bestindices[16]; { int i = 0; for(; i < b0; i++) { bestindices[i] = 0; } for(; i < b0+b1; i++) { bestindices[i] = 2; } for(; i < 16; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; for( int i = 0; i < 16; ++i ) ordered[m_order[i]] = bestindices[i]; // save the block WriteColourBlock3( beststart, bestend, ordered, block ); // save the error m_besterror = besterror; } }
void FastClusterFit::Compress3( void* block ) { Vec4 const one = VEC4_CONST(1.0f); Vec4 const zero = VEC4_CONST(0.0f); Vec4 const half = VEC4_CONST(0.5f); Vec4 const two = VEC4_CONST(2.0); // declare variables Vec4 beststart = VEC4_CONST( 0.0f ); Vec4 bestend = VEC4_CONST( 0.0f ); Vec4 besterror = VEC4_CONST( FLT_MAX ); Vec4 x0 = zero; Vec4 x1; int b0 = 0, b1 = 0; int i = 0; // check all possible clusters for this total order for( int c0 = 0; c0 <= 16; c0++) { x1 = zero; for( int c1 = 0; c1 <= 16-c0; c1++) { Vec4 const constants = Vec4((const float *)&s_threeElement[i]); Vec4 const alpha2_sum = constants.SplatX(); Vec4 const beta2_sum = constants.SplatY(); Vec4 const alphabeta_sum = constants.SplatZ(); Vec4 const factor = constants.SplatW(); i++; Vec4 const alphax_sum = MultiplyAdd(half, x1, x0); Vec4 const betax_sum = m_xsum - alphax_sum; Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; // clamp the output to [0, 1] a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); // clamp to the grid Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; // compute the error Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); // apply the metric to the error term Vec4 e4 = e3 * m_metricSqr; Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) { besterror = error; beststart = a; bestend = b; b0 = c0; b1 = c1; } x1 += m_unweighted[c0+c1]; } x0 += m_unweighted[c0]; } // save the block if necessary if( CompareAnyLessThan( besterror, m_besterror ) ) { // compute indices from cluster sizes. /*uint bestindices = 0; { int i = b0; for(; i < b0+b1; i++) { bestindices |= 2 << (2 * i); } for(; i < 16; i++) { bestindices |= 1 << (2 * i); } }*/ u8 bestindices[16]; { int i = 0; for(; i < b0; i++) { bestindices[i] = 0; } for(; i < b0+b1; i++) { bestindices[i] = 2; } for(; i < 16; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; for( int i = 0; i < 16; ++i ) ordered[m_order[i]] = bestindices[i]; m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices. // save the block WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), ordered, block ); // save the error m_besterror = besterror; } }
void WeightedClusterFit::Compress3( void* block ) { int const count = m_colours->GetCount(); Vec3 const one( 1.0f ); Vec3 const zero( 0.0f ); Vec3 const half( 0.5f ); Vec3 const grid( 31.0f, 63.0f, 31.0f ); Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); float besterror = FLT_MAX; Vec3 x0(0.0f); float w0 = 0.0f; int b0 = 0, b1 = 0; // check all possible clusters for this total order for( int c0 = 0; c0 < count; c0++) { Vec3 x1(0.0f); float w1 = 0.0f; for( int c1 = 0; c1 < count-c0; c1++) { float w2 = m_wsum - w0 - w1; // These factors could be entirely precomputed. float const alpha2_sum = w0 + w1 * 0.25f; float const beta2_sum = w2 + w1 * 0.25f; float const alphabeta_sum = w1 * 0.25f; float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); Vec3 const alphax_sum = x0 + x1 * 0.5f; Vec3 const betax_sum = m_xsum - alphax_sum; Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor; Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; // compute the error Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); // apply the metric to the error term float error = Dot( e1, m_metricSqr ); // keep the solution if it wins if( error < besterror ) { besterror = error; beststart = a; bestend = b; b0 = c0; b1 = c1; } x1 += m_weighted[c0+c1]; w1 += m_weights[c0+c1]; } x0 += m_weighted[c0]; w0 += m_weights[c0]; } // save the block if necessary if( besterror < m_besterror ) { // compute indices from cluster sizes. u8 bestindices[16]; { int i = 0; for(; i < b0; i++) { bestindices[i] = 0; } for(; i < b0+b1; i++) { bestindices[i] = 2; } for(; i < count; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; for( int i = 0; i < count; ++i ) ordered[m_order[i]] = bestindices[i]; m_colours->RemapIndices( ordered, bestindices ); // save the block WriteColourBlock3( beststart, bestend, bestindices, block ); // save the error m_besterror = besterror; } }
void WeightedClusterFit::Compress3( void* block ) { int const count = m_colours->GetCount(); Vec4 const one = VEC4_CONST(1.0f); Vec4 const zero = VEC4_CONST(0.0f); Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f); Vec4 const two = VEC4_CONST(2.0); Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); // declare variables Vec4 beststart = VEC4_CONST( 0.0f ); Vec4 bestend = VEC4_CONST( 0.0f ); Vec4 besterror = VEC4_CONST( FLT_MAX ); Vec4 x0 = zero; int b0 = 0, b1 = 0; // check all possible clusters for this total order for( int c0 = 0; c0 < count; c0++) { Vec4 x1 = zero; for( int c1 = 0; c1 < count-c0; c1++) { Vec4 const x2 = m_xsum - x1 - x0; //Vec3 const alphax_sum = x0 + x1 * 0.5f; //float const alpha2_sum = w0 + w1 * 0.25f; Vec4 const alphax_sum = MultiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum Vec4 const alpha2_sum = alphax_sum.SplatW(); //Vec3 const betax_sum = x2 + x1 * 0.5f; //float const beta2_sum = w2 + w1 * 0.25f; Vec4 const betax_sum = MultiplyAdd(x1, half, x2); // betax_sum, beta2_sum Vec4 const beta2_sum = betax_sum.SplatW(); //float const alphabeta_sum = w1 * 0.25f; Vec4 const alphabeta_sum = (x1 * half).SplatW(); // alphabeta_sum // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; // compute the error (we skip the constant xxsum) Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); Vec4 e4 = MultiplyAdd( two, e3, e1 ); // apply the metric to the error term Vec4 e5 = e4 * m_metricSqr; Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) { besterror = error; beststart = a; bestend = b; b0 = c0; b1 = c1; } x1 += m_weighted[c0+c1]; } x0 += m_weighted[c0]; } // save the block if necessary if( CompareAnyLessThan( besterror, m_besterror ) ) { // compute indices from cluster sizes. u8 bestindices[16]; { int i = 0; for(; i < b0; i++) { bestindices[i] = 0; } for(; i < b0+b1; i++) { bestindices[i] = 2; } for(; i < count; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; for( int i = 0; i < count; ++i ) ordered[m_order[i]] = bestindices[i]; m_colours->RemapIndices( ordered, bestindices ); // save the block WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); // save the error m_besterror = besterror; } }