void FastClusterFit::Compress4( void* block ) { // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); float besterror = FLT_MAX; Vec3 x0(0.0f); Vec3 x1; Vec3 x2; int b0 = 0, b1 = 0, b2 = 0; int i = 0; // check all possible clusters for this total order for( int c0 = 0; c0 <= 16; c0++) { x1 = Vec3(0.0f); for( int c1 = 0; c1 <= 16-c0; c1++) { x2 = Vec3(0.0f); for( int c2 = 0; c2 <= 16-c0-c1; c2++) { float const alpha2_sum = s_fourElement[i].alpha2_sum; float const beta2_sum = s_fourElement[i].beta2_sum; float const alphabeta_sum = s_fourElement[i].alphabeta_sum; float const factor = s_fourElement[i].factor; i++; Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); Vec3 const betax_sum = m_xsum - alphax_sum; Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor; Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; // clamp the output to [0, 1] Vec3 const one( 1.0f ); Vec3 const zero( 0.0f ); a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); // clamp to the grid Vec3 const grid( 31.0f, 63.0f, 31.0f ); Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f ); Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; // compute the error Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); // apply the metric to the error term float error = Dot( e1, m_metricSqr ); // keep the solution if it wins if( error < besterror ) { besterror = error; beststart = a; bestend = b; b0 = c0; b1 = c1; b2 = c2; } x2 += m_unweighted[c0+c1+c2]; } x1 += m_unweighted[c0+c1]; } x0 += m_unweighted[c0]; } // save the block if necessary if( besterror < m_besterror ) { // compute indices from cluster sizes. /*uint bestindices = 0; { int i = b0; for(; i < b0+b1; i++) { bestindices = 2 << (2 * m_order[i]); } for(; i < b0+b1+b2; i++) { bestindices = 3 << (2 * m_order[i]); } for(; i < 16; i++) { bestindices = 1 << (2 * m_order[i]); } }*/ u8 bestindices[16]; { int i = 0; for(; i < b0; i++) { bestindices[i] = 0; } for(; i < b0+b1; i++) { bestindices[i] = 2; } for(; i < b0+b1+b2; i++) { bestindices[i] = 3; } for(; i < 16; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; for( int i = 0; i < 16; ++i ) ordered[m_order[i]] = bestindices[i]; // save the block WriteColourBlock4( beststart, bestend, ordered, block ); // save the error m_besterror = besterror; } }
void WeightedClusterFit::Compress4( void* block ) { int const count = m_colours->GetCount(); Vec3 const one( 1.0f ); Vec3 const zero( 0.0f ); Vec3 const half( 0.5f ); Vec3 const grid( 31.0f, 63.0f, 31.0f ); Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); float besterror = FLT_MAX; Vec3 x0(0.0f); float w0 = 0.0f; int b0 = 0, b1 = 0, b2 = 0; // check all possible clusters for this total order for( int c0 = 0; c0 < count; c0++) { Vec3 x1(0.0f); float w1 = 0.0f; for( int c1 = 0; c1 < count-c0; c1++) { Vec3 x2(0.0f); float w2 = 0.0f; for( int c2 = 0; c2 < count-c0-c1; c2++) { float w3 = m_wsum - w0 - w1 - w2; float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); Vec3 const betax_sum = m_xsum - alphax_sum; Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor; Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; // compute the error Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); // apply the metric to the error term float error = Dot( e1, m_metricSqr ); // keep the solution if it wins if( error < besterror ) { besterror = error; beststart = a; bestend = b; b0 = c0; b1 = c1; b2 = c2; } x2 += m_weighted[c0+c1+c2]; w2 += m_weights[c0+c1+c2]; } x1 += m_weighted[c0+c1]; w1 += m_weights[c0+c1]; } x0 += m_weighted[c0]; w0 += m_weights[c0]; } // save the block if necessary if( besterror < m_besterror ) { // compute indices from cluster sizes. u8 bestindices[16]; { int i = 0; for(; i < b0; i++) { bestindices[i] = 0; } for(; i < b0+b1; i++) { bestindices[i] = 2; } for(; i < b0+b1+b2; i++) { bestindices[i] = 3; } for(; i < count; i++) { bestindices[i] = 1; } } // remap the indices u8 ordered[16]; for( int i = 0; i < count; ++i ) ordered[m_order[i]] = bestindices[i]; m_colours->RemapIndices( ordered, bestindices ); // save the block WriteColourBlock4( beststart, bestend, bestindices, block ); // save the error m_besterror = besterror; } }