Exemplo n.º 1
0
void SingleColourFit::Compress4( void* block )
{
	// build the table of lookups
	SingleColourLookup const* const lookups[] = 
	{
		lookup_5_4, 
		lookup_6_4, 
		lookup_5_4
	};
	
	// find the best end-points and index
	ComputeEndPoints( lookups );
	
	// build the block if we win
	if( m_error < m_besterror )
	{
		// remap the indices
		u8 indices[16];
		m_colours->RemapIndices( &m_index, indices );
		
		// save the block
		WriteColourBlock4( m_start, m_end, indices, block );

		// save the error
		m_besterror = m_error;
	}
}
Exemplo n.º 2
0
void RangeFit::Compress4( void* block )
{
    // cache some values
    int const count = m_colours->GetCount();
    Vec3 const* values = m_colours->GetPoints();

    // create a codebook
    Vec3 codes[4];
    codes[0] = m_start;
    codes[1] = m_end;
    codes[2] = ( 2.0f/3.0f )*m_start + ( 1.0f/3.0f )*m_end;
    codes[3] = ( 1.0f/3.0f )*m_start + ( 2.0f/3.0f )*m_end;

    // match each point to the closest code
    u8 closest[16];
    float error = 0.0f;
    for( int i = 0; i < count; ++i )
    {
        // find the closest code
        float dist = FLT_MAX;
        int idx = 0;
        for( int j = 0; j < 4; ++j )
        {
            float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
            if( d < dist )
            {
                dist = d;
                idx = j;
            }
        }

        // save the index
        closest[i] = ( u8 )idx;

        // accumulate the error
        error += dist;
    }

    // save this scheme if it wins
    if( error < m_besterror )
    {
        // remap the indices
        u8 indices[16];
        m_colours->RemapIndices( closest, indices );

        // save the block
        WriteColourBlock4( m_start, m_end, indices, block );

        // save the error
        m_besterror = error;
    }
}
Exemplo n.º 3
0
void ColourNormalFit::Compress4(void* block)
{
  const Vec3 scale  = Vec3( 1.0f / 0.5f);
  const Vec3 offset = Vec3(-1.0f * 0.5f);

  // cache some values
  int const count = m_colours->GetCount();
  Vec3 const* values = m_colours->GetPoints();
  Scr3 const* freq = m_colours->GetWeights();
  
  // use a fitting algorithm
  m_start = m_start_candidate;
  m_end   = m_end_candidate;
  if (m_flags & kColourIterativeClusterFits)
    kMeans4();
//if ((m_flags & kColourIterativeClusterFits) >= (kColourIterativeClusterFit))
//  Permute4();
  
  // create a codebook
  Vec3 codes[4]; Codebook4n(codes, m_start, m_end);

  // match each point to the closest code
  u8 closest[16];

  Scr3 error = Scr3(DEVIANCE_BASE);
  for (int i = 0; i < count; ++i) {
    int idx = 0;

    // find the closest code
    Vec3 value = Normalize(scale * (offset + values[i]));
    Scr3 dist; MinDeviance4<true>(dist, idx, value, codes);

    // accumulate the error
    AddDeviance(dist, error, freq[i]);

    // save the index
    closest[i] = (u8)idx;
  }

  // save this scheme if it wins
  if (error < m_besterror) {
    // save the error
    m_besterror = error;

    // remap the indices
    u8 indices[16]; m_colours->RemapIndices(closest, indices);

    // save the block
    WriteColourBlock4(m_start, m_end, indices, block);
  }
}
Exemplo n.º 4
0
void ClusterFit::Compress4( void* block )
{
	//debug = (run == 1);
	//run++;

	// declare variables
	int const count = m_colours->GetCount();
#if SQUISH_USE_SIMD
	Vec4 beststart = VEC4_CONST( 0.0f );
	Vec4 bestend = VEC4_CONST( 0.0f );
	Vec4 besterror = m_besterror;
	Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f );
	Vec4 const onethird = VEC4_CONST( 1.0f/3.0f );
	Vec4 const zero = VEC4_CONST( 0.0f );
#else
	Vec3 beststart( 0.0f );
	Vec3 bestend( 0.0f );
	float besterror = m_besterror;
	float const twothirds = 2.0f/3.0f;
	float const onethird = 1.0f/3.0f;
	float const zero = 0.0f;
#endif

	// check all possible clusters for this total order
	u8 indices[16];
	u8 bestindices[16];
	
	// first cluster [0,i) is at the start
	for( int m = 0; m < count; ++m )
	{
		indices[m] = 0;
		m_alpha[m] = m_weights[m];
		m_beta[m] = zero;
	}
	for( int i = count; i >= 0; --i )
	{
		// second cluster [i,j) is one third along
		for( int m = i; m < count; ++m )
		{
			indices[m] = 2;
			m_alpha[m] = twothirds*m_weights[m];
			m_beta[m] = onethird*m_weights[m];
		}		
		for( int j = count; j >= i; --j )
		{
			// third cluster [j,k) is two thirds along
			for( int m = j; m < count; ++m )
			{
				indices[m] = 3;
				m_alpha[m] = onethird*m_weights[m];
				m_beta[m] = twothirds*m_weights[m];
			}		
			for( int k = count; k >= j; --k )
			{
				if (j + k == 0) continue;
				
				// last cluster [k,n) is at the end
				if( k < count )
				{
					indices[k] = 1;
					m_alpha[k] = zero;
					m_beta[k] = m_weights[k];
				}

				// solve a least squares problem to place the endpoints
#if SQUISH_USE_SIMD
				Vec4 start, end;
				Vec4 error = SolveLeastSquares( start, end );
#else
				Vec3 start, end;
				float error = SolveLeastSquares( start, end );
#endif

				// keep the solution if it wins
#if SQUISH_USE_SIMD
				if( CompareAnyLessThan( error, besterror ) )
#else
				if( error < besterror )
#endif
				{
					beststart = start;
					bestend = end;
					for( int m = 0; m < 16; ++m )	// TODO: make this faster?
						bestindices[m] = indices[m];	
					besterror = error;
				}
			}
		}
	}

	// save the block if necessary
#if SQUISH_USE_SIMD
	if( CompareAnyLessThan( besterror, m_besterror ) )
#else
	if( besterror < m_besterror )
#endif
	{
		// remap the indices
		u8 unordered[16];
		for( int i = 0; i < count; ++i )
			unordered[m_order[i]] = bestindices[i];
		m_colours->RemapIndices( unordered, bestindices );
		
		// save the block
#if SQUISH_USE_SIMD
		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
#else
		WriteColourBlock4( beststart, bestend, bestindices, block );
#endif

		// save the error
		m_besterror = besterror;
	}
}
Exemplo n.º 5
0
void FastClusterFit::Compress4( void* block )
{
	// declare variables
	Vec3 beststart( 0.0f );
	Vec3 bestend( 0.0f );
	float besterror = FLT_MAX;

	Vec3 x0(0.0f);
	Vec3 x1;
	Vec3 x2;
	int b0 = 0, b1 = 0, b2 = 0;
	int i = 0;

	// check all possible clusters for this total order
	for( int c0 = 0; c0 <= 16; c0++)
	{	
		x1 = Vec3(0.0f);
		
		for( int c1 = 0; c1 <= 16-c0; c1++)
		{	
			x2 = Vec3(0.0f);
			
			for( int c2 = 0; c2 <= 16-c0-c1; c2++)
			{
				float const alpha2_sum = s_fourElement[i].alpha2_sum;
				float const beta2_sum = s_fourElement[i].beta2_sum;
				float const alphabeta_sum = s_fourElement[i].alphabeta_sum;
				float const factor = s_fourElement[i].factor;
				i++;
				
				Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
				Vec3 const betax_sum = m_xsum - alphax_sum;
				
				Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
				Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
				
				// clamp the output to [0, 1]
				Vec3 const one( 1.0f );
				Vec3 const zero( 0.0f );
				a = Min( one, Max( zero, a ) );
				b = Min( one, Max( zero, b ) );
				
				// clamp to the grid
				Vec3 const grid( 31.0f, 63.0f, 31.0f );
				Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
				Vec3 const half( 0.5f );
				a = Floor( grid*a + half )*gridrcp;
				b = Floor( grid*b + half )*gridrcp;
				
				// compute the error
				Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
				
				// apply the metric to the error term
				float error = Dot( e1, m_metricSqr );
				
				// keep the solution if it wins
				if( error < besterror )
				{
					besterror = error;
					beststart = a;
					bestend = b;
					b0 = c0;
					b1 = c1;
					b2 = c2;
				}
				
				x2 += m_unweighted[c0+c1+c2];
			}
			
			x1 += m_unweighted[c0+c1];
		}
		
		x0 += m_unweighted[c0];
	}

	// save the block if necessary
	if( besterror < m_besterror )
	{
		// compute indices from cluster sizes.
		/*uint bestindices = 0;
		{
			int i = b0;
			for(; i < b0+b1; i++) {
				bestindices = 2 << (2 * m_order[i]);
			}
			for(; i < b0+b1+b2; i++) {
				bestindices = 3 << (2 * m_order[i]);
			}
			for(; i < 16; i++) {
				bestindices = 1 << (2 * m_order[i]);
			}
		}*/
		u8 bestindices[16];
		{
			int i = 0;
			for(; i < b0; i++) {
				bestindices[i] = 0;
			}
			for(; i < b0+b1; i++) {
				bestindices[i] = 2;
			}
			for(; i < b0+b1+b2; i++) {
				bestindices[i] = 3;
			}
			for(; i < 16; i++) {
				bestindices[i] = 1;
			}
		}
		
		// remap the indices
		u8 ordered[16];
		for( int i = 0; i < 16; ++i )
			ordered[m_order[i]] = bestindices[i];
		
		// save the block
		WriteColourBlock4( beststart, bestend, ordered, block );
		
		// save the error
		m_besterror = besterror;
	}
}
Exemplo n.º 6
0
void FastClusterFit::Compress4( void* block )
{
	Vec4 const one = VEC4_CONST(1.0f);
	Vec4 const zero = VEC4_CONST(0.0f);
	Vec4 const half = VEC4_CONST(0.5f);
	Vec4 const two = VEC4_CONST(2.0);
	Vec4 const onethird = VEC4_CONST( 1.0f/3.0f );
	Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f );

	// declare variables
	Vec4 beststart = VEC4_CONST( 0.0f );
	Vec4 bestend = VEC4_CONST( 0.0f );
	Vec4 besterror = VEC4_CONST( FLT_MAX );

	Vec4 x0 = zero;
	int b0 = 0, b1 = 0, b2 = 0;
	int i = 0;

	// check all possible clusters for this total order
	for( int c0 = 0; c0 <= 16; c0++)
	{	
		Vec4 x1 = zero;
		
		for( int c1 = 0; c1 <= 16-c0; c1++)
		{	
			Vec4 x2 = zero;
			
			for( int c2 = 0; c2 <= 16-c0-c1; c2++)
			{
				Vec4 const constants = Vec4((const float *)&s_fourElement[i]);
				Vec4 const alpha2_sum = constants.SplatX();
				Vec4 const beta2_sum = constants.SplatY();
				Vec4 const alphabeta_sum = constants.SplatZ();
				Vec4 const factor = constants.SplatW();
				i++;
				
				Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird);
				Vec4 const betax_sum = m_xsum - alphax_sum;
				
				Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
				Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
				
				// clamp the output to [0, 1]
				a = Min( one, Max( zero, a ) );
				b = Min( one, Max( zero, b ) );
				
				// clamp to the grid
				Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
				Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
				a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
				b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
				
				// compute the error
				Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
				Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
				Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
				
				// apply the metric to the error term
				Vec4 e4 = e3 * m_metricSqr;
				Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
				
				// keep the solution if it wins
				if( CompareAnyLessThan( error, besterror ) )
				{
					besterror = error;
					beststart = a;
					bestend = b;
					b0 = c0;
					b1 = c1;
					b2 = c2;
				}
				
				x2 += m_unweighted[c0+c1+c2];
			}
			
			x1 += m_unweighted[c0+c1];
		}
		
		x0 += m_unweighted[c0];
	}

	// save the block if necessary
	if( CompareAnyLessThan( besterror, m_besterror ) )
	{
		// compute indices from cluster sizes.
		/*uint bestindices = 0;
		{
			int i = b0;
			for(; i < b0+b1; i++) {
				bestindices = 2 << (2 * m_order[i]);
			}
			for(; i < b0+b1+b2; i++) {
				bestindices = 3 << (2 * m_order[i]);
			}
			for(; i < 16; i++) {
				bestindices = 1 << (2 * m_order[i]);
			}
		}*/
		u8 bestindices[16];
		{
			int i = 0;
			for(; i < b0; i++) {
				bestindices[i] = 0;
			}
			for(; i < b0+b1; i++) {
				bestindices[i] = 2;
			}
			for(; i < b0+b1+b2; i++) {
				bestindices[i] = 3;
			}
			for(; i < 16; i++) {
				bestindices[i] = 1;
			}
		}
		
		// remap the indices
		u8 ordered[16];
		for( int i = 0; i < 16; ++i )
			ordered[m_order[i]] = bestindices[i];
		
		// save the block
		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
		
		// save the error
		m_besterror = besterror;
	}
}
void WeightedClusterFit::Compress4( void* block )
{
    int const count = m_colours->GetCount();
    Vec3 const one( 1.0f );
    Vec3 const zero( 0.0f );
    Vec3 const half( 0.5f );
    Vec3 const grid( 31.0f, 63.0f, 31.0f );
    Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );

    // declare variables
    Vec3 beststart( 0.0f );
    Vec3 bestend( 0.0f );
    float besterror = FLT_MAX;

    Vec3 x0(0.0f);
    float w0 = 0.0f;
    int b0 = 0, b1 = 0, b2 = 0;

    // check all possible clusters for this total order
    for( int c0 = 0; c0 < count; c0++)
    {
        Vec3 x1(0.0f);
        float w1 = 0.0f;

        for( int c1 = 0; c1 < count-c0; c1++)
        {
            Vec3 x2(0.0f);
            float w2 = 0.0f;

            for( int c2 = 0; c2 < count-c0-c1; c2++)
            {
                float w3 = m_wsum - w0 - w1 - w2;

                float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
                float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
                float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
                float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);

                Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
                Vec3 const betax_sum = m_xsum - alphax_sum;

                Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
                Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;

                // clamp to the grid
                a = Min( one, Max( zero, a ) );
                b = Min( one, Max( zero, b ) );
                a = Floor( grid*a + half )*gridrcp;
                b = Floor( grid*b + half )*gridrcp;

                // compute the error
                Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );

                // apply the metric to the error term
                float error = Dot( e1, m_metricSqr );

                // keep the solution if it wins
                if( error < besterror )
                {
                    besterror = error;
                    beststart = a;
                    bestend = b;
                    b0 = c0;
                    b1 = c1;
                    b2 = c2;
                }

                x2 += m_weighted[c0+c1+c2];
                w2 += m_weights[c0+c1+c2];
            }

            x1 += m_weighted[c0+c1];
            w1 += m_weights[c0+c1];
        }

        x0 += m_weighted[c0];
        w0 += m_weights[c0];
    }

    // save the block if necessary
    if( besterror < m_besterror )
    {
        // compute indices from cluster sizes.
        u8 bestindices[16];
        {
            int i = 0;
            for(; i < b0; i++) {
                bestindices[i] = 0;
            }
            for(; i < b0+b1; i++) {
                bestindices[i] = 2;
            }
            for(; i < b0+b1+b2; i++) {
                bestindices[i] = 3;
            }
            for(; i < count; i++) {
                bestindices[i] = 1;
            }
        }

        // remap the indices
        u8 ordered[16];
        for( int i = 0; i < count; ++i )
            ordered[m_order[i]] = bestindices[i];

        m_colours->RemapIndices( ordered, bestindices );

        // save the block
        WriteColourBlock4( beststart, bestend, bestindices, block );

        // save the error
        m_besterror = besterror;
    }
}
void WeightedClusterFit::Compress4( void* block )
{
    int const count = m_colours->GetCount();
    Vec4 const one = VEC4_CONST(1.0f);
    Vec4 const zero = VEC4_CONST(0.0f);
    Vec4 const half = VEC4_CONST(0.5f);
    Vec4 const two = VEC4_CONST(2.0);
    Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
    Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
    Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
    Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
    Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );

    // declare variables
    Vec4 beststart = VEC4_CONST( 0.0f );
    Vec4 bestend = VEC4_CONST( 0.0f );
    Vec4 besterror = VEC4_CONST( FLT_MAX );

    Vec4 x0 = zero;
    int b0 = 0, b1 = 0, b2 = 0;

    // check all possible clusters for this total order
    for( int c0 = 0; c0 < count; c0++)
    {
        Vec4 x1 = zero;

        for( int c1 = 0; c1 < count-c0; c1++)
        {
            Vec4 x2 = zero;

            for( int c2 = 0; c2 < count-c0-c1; c2++)
            {
                Vec4 const x3 = m_xsum - x2 - x1 - x0;

                //Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
                //float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
                Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
                Vec4 const alpha2_sum = alphax_sum.SplatW();

                //Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
                //float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
                Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
                Vec4 const beta2_sum = betax_sum.SplatW();

                //float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
                Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum

                // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
                Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );

                Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
                Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;

                // clamp to the grid
                a = Min( one, Max( zero, a ) );
                b = Min( one, Max( zero, b ) );
                a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
                b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;

                // compute the error (we skip the constant xxsum)
                Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
                Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
                Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
                Vec4 e4 = MultiplyAdd( two, e3, e1 );

                // apply the metric to the error term
                Vec4 e5 = e4 * m_metricSqr;
                Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();

                // keep the solution if it wins
                if( CompareAnyLessThan( error, besterror ) )
                {
                    besterror = error;
                    beststart = a;
                    bestend = b;
                    b0 = c0;
                    b1 = c1;
                    b2 = c2;
                }

                x2 += m_weighted[c0+c1+c2];
            }

            x1 += m_weighted[c0+c1];
        }

        x0 += m_weighted[c0];
    }

    // save the block if necessary
    if( CompareAnyLessThan( besterror, m_besterror ) )
    {
        // compute indices from cluster sizes.
        u8 bestindices[16];
        {
            int i = 0;
            for(; i < b0; i++) {
                bestindices[i] = 0;
            }
            for(; i < b0+b1; i++) {
                bestindices[i] = 2;
            }
            for(; i < b0+b1+b2; i++) {
                bestindices[i] = 3;
            }
            for(; i < count; i++) {
                bestindices[i] = 1;
            }
        }

        // remap the indices
        u8 ordered[16];
        for( int i = 0; i < count; ++i )
            ordered[m_order[i]] = bestindices[i];

        m_colours->RemapIndices( ordered, bestindices );

        // save the block
        WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );

        // save the error
        m_besterror = besterror;
    }
}