void Stereo_Buffer::mix_stereo_no_center( blip_sample_t* out_, blargg_long count )
{
	blip_sample_t* BLIP_RESTRICT out = out_;
	int const bass = BLIP_READER_BASS( bufs [1] );
	BLIP_READER_BEGIN( left, bufs [1] );
	BLIP_READER_BEGIN( right, bufs [2] );
	
	for ( ; count; --count )
	{
		blargg_long l = BLIP_READER_READ( left );
		if ( (BOOST::int16_t) l != l )
			l = 0x7FFF - (l >> 24);
		
		blargg_long r = BLIP_READER_READ( right );
		if ( (BOOST::int16_t) r != r )
			r = 0x7FFF - (r >> 24);
		
		BLIP_READER_NEXT( left, bass );
		BLIP_READER_NEXT( right, bass );
		
		out [0] = l;
		out [1] = r;
		out += 2;
	}
	
	BLIP_READER_END( right, bufs [2] );
	BLIP_READER_END( left, bufs [1] );
}
void Stereo_Mixer::mix_stereo( blip_sample_t* out_, int count )
{
	blip_sample_t* BLIP_RESTRICT out = out_ + count * stereo;

	// do left + center and right + center separately to reduce register load
	Tracked_Blip_Buffer* const* buf = &bufs [2];
	while ( true ) // loop runs twice
	{
		--buf;
		--out;

		int const bass = BLIP_READER_BASS( *bufs [2] );
		BLIP_READER_BEGIN( side,   **buf );
		BLIP_READER_BEGIN( center, *bufs [2] );

		BLIP_READER_ADJ_( side,   samples_read );
		BLIP_READER_ADJ_( center, samples_read );

		int offset = -count;
		do
		{
			blargg_long s = BLIP_READER_READ_RAW( center ) + BLIP_READER_READ_RAW( side );
			s >>= blip_sample_bits - 16;
			BLIP_READER_NEXT_IDX_( side,   bass, offset );
			BLIP_READER_NEXT_IDX_( center, bass, offset );
			BLIP_CLAMP( s, s );

			++offset; // before write since out is decremented to slightly before end
			out [offset * stereo] = (blip_sample_t) s;
		}
		while ( offset );

		BLIP_READER_END( side,   **buf );

		if ( buf != bufs )
			continue;

		// only end center once
		BLIP_READER_END( center, *bufs [2] );
		break;
	}
}
void Stereo_Buffer::mix_mono( blip_sample_t* out_, blargg_long count )
{
	blip_sample_t* BLIP_RESTRICT out = out_;
	int const bass = BLIP_READER_BASS( bufs [0] );
	BLIP_READER_BEGIN( center, bufs [0] );
	
	for ( ; count; --count )
	{
		blargg_long s = BLIP_READER_READ( center );
		if ( (BOOST::int16_t) s != s )
			s = 0x7FFF - (s >> 24);
		
		BLIP_READER_NEXT( center, bass );
		out [0] = s;
		out [1] = s;
		out += 2;
	}
	
	BLIP_READER_END( center, bufs [0] );
}
void Stereo_Mixer::mix_mono( blip_sample_t* out_, int count )
{
	int const bass = BLIP_READER_BASS( *bufs [2] );
	BLIP_READER_BEGIN( center, *bufs [2] );
	BLIP_READER_ADJ_( center, samples_read );

	typedef blip_sample_t stereo_blip_sample_t [stereo];
	stereo_blip_sample_t* BLIP_RESTRICT out = (stereo_blip_sample_t*) out_ + count;
	int offset = -count;
	do
	{
		blargg_long s = BLIP_READER_READ( center );
		BLIP_READER_NEXT_IDX_( center, bass, offset );
		BLIP_CLAMP( s, s );

		out [offset] [0] = (blip_sample_t) s;
		out [offset] [1] = (blip_sample_t) s;
	}
	while ( ++offset );

	BLIP_READER_END( center, *bufs [2] );
}
void Effects_Buffer::mix_effects( blip_sample_t* out_, int pair_count )
{
    typedef fixed_t stereo_fixed_t [stereo];

    // add channels with echo, do echo, add channels without echo, then convert to 16-bit and output
    int echo_phase = 1;
    do
    {
        // mix any modified buffers
        {
            buf_t* buf = bufs;
            int bufs_remain = bufs_size;
            do
            {
                if ( buf->non_silent() && ( buf->echo == (bool)echo_phase ) )
                {
                    stereo_fixed_t* BLIP_RESTRICT out = (stereo_fixed_t*) &echo [echo_pos];
                    int const bass = BLIP_READER_BASS( *buf );
                    BLIP_READER_BEGIN( in, *buf );
                    BLIP_READER_ADJ_( in, mixer.samples_read );
                    fixed_t const vol_0 = buf->vol [0];
                    fixed_t const vol_1 = buf->vol [1];

                    int count = unsigned (echo_size - echo_pos) / stereo;
                    int remain = pair_count;
                    if ( count > remain )
                        count = remain;
                    do
                    {
                        remain -= count;
                        BLIP_READER_ADJ_( in, count );

                        out += count;
                        int offset = -count;
                        do
                        {
                            fixed_t s = BLIP_READER_READ( in );
                            BLIP_READER_NEXT_IDX_( in, bass, offset );

                            out [offset] [0] += s * vol_0;
                            out [offset] [1] += s * vol_1;
                        }
                        while ( ++offset );

                        out = (stereo_fixed_t*) echo.begin();
                        count = remain;
                    }
                    while ( remain );

                    BLIP_READER_END( in, *buf );
                }
                buf++;
            }
            while ( --bufs_remain );
        }

        // add echo
        if ( echo_phase && !no_echo )
        {
            fixed_t const feedback = s.feedback;
            fixed_t const treble   = s.treble;

            int i = 1;
            do
            {
                fixed_t low_pass = s.low_pass [i];

                fixed_t* echo_end = &echo [echo_size + i];
                fixed_t const* BLIP_RESTRICT in_pos = &echo [echo_pos + i];
                blargg_long out_offset = echo_pos + i + s.delay [i];
                if ( out_offset >= echo_size )
                    out_offset -= echo_size;
                assert( out_offset < echo_size );
                fixed_t* BLIP_RESTRICT out_pos = &echo [out_offset];

                // break into up to three chunks to avoid having to handle wrap-around
                // in middle of core loop
                int remain = pair_count;
                do
                {
                    fixed_t const* pos = in_pos;
                    if ( pos < out_pos )
                        pos = out_pos;
                    int count = blargg_ulong ((char*) echo_end - (char const*) pos) /
                                unsigned (stereo * sizeof (fixed_t));
                    if ( count > remain )
                        count = remain;
                    remain -= count;

                    in_pos  += count * stereo;
                    out_pos += count * stereo;
                    int offset = -count;
                    do
                    {
                        low_pass += FROM_FIXED( in_pos [offset * stereo] - low_pass ) * treble;
                        out_pos [offset * stereo] = FROM_FIXED( low_pass ) * feedback;
                    }
                    while ( ++offset );

                    if (  in_pos >= echo_end )  in_pos -= echo_size;
                    if ( out_pos >= echo_end ) out_pos -= echo_size;
                }
                while ( remain );

                s.low_pass [i] = low_pass;
            }
            while ( --i >= 0 );
        }
    }
    while ( --echo_phase >= 0 );

    // clamp to 16 bits
    {
        stereo_fixed_t const* BLIP_RESTRICT in = (stereo_fixed_t*) &echo [echo_pos];
        typedef blip_sample_t stereo_blip_sample_t [stereo];
        stereo_blip_sample_t* BLIP_RESTRICT out = (stereo_blip_sample_t*) out_;
        int count = unsigned (echo_size - echo_pos) / (unsigned) stereo;
        int remain = pair_count;
        if ( count > remain )
            count = remain;
        do
        {
            remain -= count;
            in  += count;
            out += count;
            int offset = -count;
            do
            {
                fixed_t in_0 = FROM_FIXED( in [offset] [0] );
                fixed_t in_1 = FROM_FIXED( in [offset] [1] );

                BLIP_CLAMP( in_0, in_0 );
                out [offset] [0] = (blip_sample_t) in_0;

                BLIP_CLAMP( in_1, in_1 );
                out [offset] [1] = (blip_sample_t) in_1;
            }
            while ( ++offset );

            in = (stereo_fixed_t*) echo.begin();
            count = remain;
        }
        while ( remain );
    }
}