RadixSort& RadixSort::Sort(const float* input2, udword nb)
{
    // Checkings
    if(!input2 || !nb)    return *this;

    // Stats
    mTotalCalls++;

    udword* input = (udword*)input2;

    // Resize lists if needed
    CHECK_RESIZE(nb);

#ifdef RADIX_LOCAL_RAM
    // Allocate histograms & offsets on the stack
    udword mHistogram[256*4];
    udword mOffset[256];
#endif

    // Create histograms (counters). Counters for all passes are created in one run.
    // Pros:    read input buffer once instead of four times
    // Cons:    mHistogram is 4Kb instead of 1Kb
    // Floating-point values are always supposed to be signed values, so there's only one code path there.
    // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
    // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
    // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
    // wouldn't work with mixed positive/negative values....
    { CREATE_HISTOGRAMS(float, input2); }

    // Compute #negative values involved if needed
    udword NbNegativeValues = 0;
    // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
    // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
    // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
    udword* h3= &mHistogram[768];
    for(udword i=128;i<256;i++)    NbNegativeValues += h3[i];    // 768 for last histogram, 128 for negative part

    // Radix sort, j is the pass number (0=LSB, 3=MSB)
    for(udword j=0;j<4;j++)
    {
        // Should we care about negative values?
        if(j!=3)
        {
            // Here we deal with positive values only
            CHECK_PASS_VALIDITY(j);

            if(PerformPass)
            {
                // Create offsets
                mOffset[0] = 0;
                for(udword i=1;i<256;i++)        mOffset[i] = mOffset[i-1] + CurCount[i-1];

                // Perform Radix Sort
                ubyte* InputBytes    = (ubyte*)input;
                udword* Indices        = mIndices;
                udword* IndicesEnd    = &mIndices[nb];
                InputBytes += j;
                while(Indices!=IndicesEnd)
                {
                    udword id = *Indices++;
                    mIndices2[mOffset[InputBytes[id<<2]]++] = id;
                }

                // Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
                udword* Tmp    = mIndices;    mIndices = mIndices2; mIndices2 = Tmp;
            }
        }
        else
        {
            // This is a special case to correctly handle negative values
            CHECK_PASS_VALIDITY(j);

            if(PerformPass)
            {
                // Create biased offsets, in order for negative numbers to be sorted as well
                mOffset[0] = NbNegativeValues;                                                // First positive number takes place after the negative ones
                for(udword i=1;i<128;i++)        mOffset[i] = mOffset[i-1] + CurCount[i-1];    // 1 to 128 for positive numbers

                // We must reverse the sorting order for negative numbers!
                mOffset[255] = 0;
                for(udword i=0;i<127;i++)        mOffset[254-i] = mOffset[255-i] + CurCount[255-i];    // Fixing the wrong order for negative values
                for(udword i=128;i<256;i++)    mOffset[i] += CurCount[i];                            // Fixing the wrong place for negative values

                // Perform Radix Sort
                for(udword i=0;i<nb;i++)
                {
                    udword Radix = input[mIndices[i]]>>24;                                // Radix byte, same as above. AND is useless here (udword).
                    // ### cmp to be killed. Not good. Later.
                    if(Radix<128)        mIndices2[mOffset[Radix]++] = mIndices[i];        // Number is positive, same as above
                    else                mIndices2[--mOffset[Radix]] = mIndices[i];        // Number is negative, flip the sorting order
                }
                // Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
                udword* Tmp    = mIndices;    mIndices = mIndices2; mIndices2 = Tmp;
            }
            else
            {
                // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
                if(UniqueVal>=128)
                {
                    for(udword i=0;i<nb;i++)    mIndices2[i] = mIndices[nb-i-1];

                    // Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
                    udword* Tmp    = mIndices;    mIndices = mIndices2; mIndices2 = Tmp;
                }
            }
        }
    }
Example #2
0
RadixSort& RadixSort::Sort(const float* input2, uint32 nb)
{
	// Checkings
	if(!input2 || !nb || nb&0x80000000)	return *this;

	// Stats
	mTotalCalls++;

	const uint32* input = (const uint32*)input2;

	// Resize lists if needed
	CheckResize(nb);

	// Allocate histograms & offsets on the stack
	uint32 Histogram[256*4];
	uint32* Link[256];

	// Create histograms (counters). Counters for all passes are created in one run.
	// Pros:	read input buffer once instead of four times
	// Cons:	mHistogram is 4Kb instead of 1Kb
	// Floating-point values are always supposed to be signed values, so there's only one code path there.
	// Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
	// is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
	// generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
	// wouldn't work with mixed positive/negative values....
	{ CREATE_HISTOGRAMS(float, input2); }

	// Radix sort, j is the pass number (0=LSB, 3=MSB)
	for(uint32 j=0;j<4;j++)
	{
		// Should we care about negative values?
		if(j!=3)
		{
			// Here we deal with positive values only
			CHECK_PASS_VALIDITY(j);

			if(PerformPass)
			{
				// Create offsets
				Link[0] = m_Ranks2;
				for(uint32 i=1;i<256;i++)		Link[i] = Link[i-1] + CurCount[i-1];

				// Perform Radix Sort
				const unsigned char* InputBytes = (const unsigned char*)input;
				InputBytes += BYTES_INC;
				if(INVALID_RANKS)
				{
					for(uint32 i=0;i<nb;i++)	*Link[InputBytes[i<<2]]++ = i;
					VALIDATE_RANKS;
				}
				else
				{
					const uint32* Indices		= m_Ranks;
					const uint32* IndicesEnd	= &m_Ranks[nb];
					while(Indices!=IndicesEnd)
					{
						const uint32 id = *Indices++;
						*Link[InputBytes[id<<2]]++ = id;
					}
				}

				// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
				uint32* Tmp = m_Ranks;
				m_Ranks = m_Ranks2;
				m_Ranks2 = Tmp;
			}
		}
		else
		{
			// This is a special case to correctly handle negative values
			CHECK_PASS_VALIDITY(j);

			if(PerformPass)
			{
#ifdef KYLE_HUBERT_VERSION
				// From Kyle Hubert:

				Link[255] = m_Ranks2 + CurCount[255];
				for(uint32 i=254;i>127;i--)	Link[i] = Link[i+1] + CurCount[i];
				Link[0] = Link[128] + CurCount[128];
				for(uint32 i=1;i<128;i++)	Link[i] = Link[i-1] + CurCount[i-1];
#else
				// Compute #negative values involved if needed
				uint32 NbNegativeValues = 0;
				// An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
				// last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
				// responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
				// ### is that ok on Apple ?!
				const uint32* h3 = &Histogram[H3_OFFSET];
				for(uint32 i=128;i<256;i++)	NbNegativeValues += h3[i];	// 768 for last histogram, 128 for negative part

				// Create biased offsets, in order for negative numbers to be sorted as well
				Link[0] = &m_Ranks2[NbNegativeValues];										// First positive number takes place after the negative ones
				for(uint32 i=1;i<128;i++)		Link[i] = Link[i-1] + CurCount[i-1];		// 1 to 128 for positive numbers

				// We must reverse the sorting order for negative numbers!
				Link[255] = m_Ranks2;
				for(uint32 i=0;i<127;i++)	Link[254-i] = Link[255-i] + CurCount[255-i];	// Fixing the wrong order for negative values
				for(uint32 i=128;i<256;i++)	Link[i] += CurCount[i];							// Fixing the wrong place for negative values
#endif
				// Perform Radix Sort
				if(INVALID_RANKS)
				{
					for(uint32 i=0;i<nb;i++)
					{
						const uint32 Radix = input[i]>>24;						// Radix byte, same as above. AND is useless here (uint32).
						// ### cmp to be killed. Not good. Later.
						if(Radix<128)		*Link[Radix]++ = i;		// Number is positive, same as above
						else				*(--Link[Radix]) = i;	// Number is negative, flip the sorting order
					}
					VALIDATE_RANKS;
				}
				else
				{
					for(uint32 i=0;i<nb;i++)
					{
						const uint32 Radix = input[m_Ranks[i]]>>24;						// Radix byte, same as above. AND is useless here (uint32).
						// ### cmp to be killed. Not good. Later.
						if(Radix<128)		*Link[Radix]++ = m_Ranks[i];		// Number is positive, same as above
						else				*(--Link[Radix]) = m_Ranks[i];	// Number is negative, flip the sorting order
					}
				}
				// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
				uint32* Tmp = m_Ranks;
				m_Ranks = m_Ranks2;
				m_Ranks2 = Tmp;
			}
			else
			{
				// The pass is useless, yet we still have to reverse the order of current list if all values are negative.
				if(UniqueVal>=128)
				{
					if(INVALID_RANKS)
					{
						// ###Possible?
						for(uint32 i=0;i<nb;i++)	m_Ranks2[i] = nb-i-1;
						VALIDATE_RANKS;
					}
					else
					{
						for(uint32 i=0;i<nb;i++)	m_Ranks2[i] = m_Ranks[nb-i-1];
					}

					// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
					uint32* Tmp = m_Ranks;
					m_Ranks = m_Ranks2;
					m_Ranks2 = Tmp;
				}
			}
		}
RadixSort& RadixSort::Sort(const udword* input, udword nb, bool signedvalues)
{
    // Checkings
    if(!input || !nb)    return *this;

    // Stats
    mTotalCalls++;

    // Resize lists if needed
    CHECK_RESIZE(nb);

#ifdef RADIX_LOCAL_RAM
    // Allocate histograms & offsets on the stack
    udword mHistogram[256*4];
    udword mOffset[256];
#endif

    // Create histograms (counters). Counters for all passes are created in one run.
    // Pros:    read input buffer once instead of four times
    // Cons:    mHistogram is 4Kb instead of 1Kb
    // We must take care of signed/unsigned values for temporal coherence.... I just
    // have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
    if(!signedvalues)    { CREATE_HISTOGRAMS(udword, input);    }
    else                { CREATE_HISTOGRAMS(sdword, input);    }

    // Compute #negative values involved if needed
    udword NbNegativeValues = 0;
    if(signedvalues)
    {
        // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
        // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
        // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
        udword* h3= &mHistogram[768];
        for(udword i=128;i<256;i++)    NbNegativeValues += h3[i];    // 768 for last histogram, 128 for negative part
    }

    // Radix sort, j is the pass number (0=LSB, 3=MSB)
    for(udword j=0;j<4;j++)
    {
        CHECK_PASS_VALIDITY(j);

        // Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is
        // not a problem, numbers are correctly sorted anyway.
        if(PerformPass)
        {
            // Should we care about negative values?
            if(j!=3 || !signedvalues)
            {
                // Here we deal with positive values only

                // Create offsets
                mOffset[0] = 0;
                for(udword i=1;i<256;i++)        mOffset[i] = mOffset[i-1] + CurCount[i-1];
            }
            else
            {
                // This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place.

                // Create biased offsets, in order for negative numbers to be sorted as well
                mOffset[0] = NbNegativeValues;                                                // First positive number takes place after the negative ones
                for(udword i=1;i<128;i++)        mOffset[i] = mOffset[i-1] + CurCount[i-1];    // 1 to 128 for positive numbers

                // Fixing the wrong place for negative values
                mOffset[128] = 0;
                for(udword i=129;i<256;i++)            mOffset[i] = mOffset[i-1] + CurCount[i-1];
            }

            // Perform Radix Sort
            ubyte* InputBytes    = (ubyte*)input;
            udword* Indices        = mIndices;
            udword* IndicesEnd    = &mIndices[nb];
            InputBytes += j;
            while(Indices!=IndicesEnd)
            {
                udword id = *Indices++;
                mIndices2[mOffset[InputBytes[id<<2]]++] = id;
            }

            // Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap.
            udword* Tmp    = mIndices;    mIndices = mIndices2; mIndices2 = Tmp;
        }
    }
    return *this;
}
Example #4
0
RadixSort& RadixSort::Sort(const uint32* input, uint32 nb, eRadixHint hint)
{
	// Checkings
	if(!input || !nb || nb&0x80000000)	return *this;

	// Stats
	mTotalCalls++;

	// Resize lists if needed
	CheckResize(nb);

	// Allocate histograms & offsets on the stack
	uint32 Histogram[256*4];
	uint32* Link[256];

	// Create histograms (counters). Counters for all passes are created in one run.
	// Pros:	read input buffer once instead of four times
	// Cons:	mHistogram is 4Kb instead of 1Kb
	// We must take care of signed/unsigned values for temporal coherence.... I just
	// have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
	if(hint==RADIX_UNSIGNED)	{ CREATE_HISTOGRAMS(uint32, input);	}
	else						{ CREATE_HISTOGRAMS(int32, input);	}

	// Radix sort, j is the pass number (0=LSB, 3=MSB)
	for(uint32 j=0;j<4;j++)
	{
		CHECK_PASS_VALIDITY(j);

		// Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is
		// not a problem, numbers are correctly sorted anyway.
		if(PerformPass)
		{
			// Should we care about negative values?
			if(j!=3 || hint==RADIX_UNSIGNED)
			{
				// Here we deal with positive values only

				// Create offsets
				Link[0] = m_Ranks2;
				for(uint32 i=1;i<256;i++)		Link[i] = Link[i-1] + CurCount[i-1];
			}
			else
			{
				// This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place.

#ifdef KYLE_HUBERT_VERSION
				// From Kyle Hubert:

				Link[128] = m_Ranks2;
				for(uint32 i=129;i<256;i++)	Link[i] = Link[i-1] + CurCount[i-1];

				Link[0] = Link[255] + CurCount[255];
				for(uint32 i=1;i<128;i++)	Link[i] = Link[i-1] + CurCount[i-1];
#else
				// Compute #negative values involved if needed
				uint32 NbNegativeValues = 0;
				if(hint==RADIX_SIGNED)
				{
					// An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
					// last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
					// responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
					const uint32* h3 = &Histogram[H3_OFFSET];
					for(uint32 i=128;i<256;i++)	NbNegativeValues += h3[i];	// 768 for last histogram, 128 for negative part
				}

				// Create biased offsets, in order for negative numbers to be sorted as well
				Link[0] = &m_Ranks2[NbNegativeValues];										// First positive number takes place after the negative ones
				for(uint32 i=1;i<128;i++)		Link[i] = Link[i-1] + CurCount[i-1];		// 1 to 128 for positive numbers

				// Fixing the wrong place for negative values
				Link[128] = m_Ranks2;
				for(uint32 i=129;i<256;i++)		Link[i] = Link[i-1] + CurCount[i-1];
#endif
			}

			// Perform Radix Sort
			const unsigned char* InputBytes	= (const unsigned char*)input;
			InputBytes += BYTES_INC;
			if(INVALID_RANKS)
			{
				for(uint32 i=0;i<nb;i++)	*Link[InputBytes[i<<2]]++ = i;
				VALIDATE_RANKS;
			}
			else
			{
				const uint32* Indices		= m_Ranks;
				const uint32* IndicesEnd	= &m_Ranks[nb];
				while(Indices!=IndicesEnd)
				{
					const uint32 id = *Indices++;
					*Link[InputBytes[id<<2]]++ = id;
				}
			}

			// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
			uint32* Tmp = m_Ranks;
			m_Ranks = m_Ranks2;
			m_Ranks2 = Tmp;
		}
	}
	return *this;
}