// Mix samples from the various audio channels into a single sample queue. // This single sample queue is where __AudioMix should read from. If the sample queue is full, we should // just sleep the main emulator thread a little. void __AudioUpdate() { // Audio throttle doesn't really work on the PSP since the mixing intervals are so closely tied // to the CPU. Much better to throttle the frame rate on frame display and just throw away audio // if the buffer somehow gets full. s32 mixBuffer[hwBlockSize * 2]; memset(mixBuffer, 0, sizeof(mixBuffer)); for (u32 i = 0; i < PSP_AUDIO_CHANNEL_MAX + 1; i++) { if (!chans[i].reserved) continue; __AudioWakeThreads(chans[i], hwBlockSize); if (!chans[i].sampleQueue.size()) { // ERROR_LOG(HLE, "No queued samples, skipping channel %i", i); continue; } for (int s = 0; s < hwBlockSize; s++) { if (chans[i].sampleQueue.size() >= 2) { s16 sampleL = chans[i].sampleQueue.pop_front(); s16 sampleR = chans[i].sampleQueue.pop_front(); mixBuffer[s * 2 + 0] += sampleL; mixBuffer[s * 2 + 1] += sampleR; } else { ERROR_LOG(HLE, "Channel %i buffer underrun at %i of %i", i, s, hwBlockSize); break; } } } if (g_Config.bEnableSound) { section.lock(); if (outAudioQueue.room() >= hwBlockSize * 2) { // Push the mixed samples onto the output audio queue. for (int i = 0; i < hwBlockSize; i++) { s16 sampleL = clamp_s16(mixBuffer[i * 2 + 0]); s16 sampleR = clamp_s16(mixBuffer[i * 2 + 1]); outAudioQueue.push((s16)sampleL); outAudioQueue.push((s16)sampleR); } } else { // This happens quite a lot. There's still something slightly off // about the amount of audio we produce. DEBUG_LOG(HLE, "Audio outbuffer overrun! room = %i / %i", outAudioQueue.room(), (u32)outAudioQueue.capacity()); } section.unlock(); } }
// Executed from sound stream thread unsigned int StereoResampler::MixerFifo::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) { unsigned int currentSample = 0; // Cache access in non-volatile variable // This is the only function changing the read value, so it's safe to // cache it locally although it's written here. // The writing pointer will be modified outside, but it will only increase, // so we will just ignore new written data while interpolating. // Without this cache, the compiler wouldn't be allowed to optimize the // interpolation loop. u32 indexR = Common::AtomicLoad(m_indexR); u32 indexW = Common::AtomicLoad(m_indexW); // We force on the audio resampler if the output sample rate doesn't match the input. for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) { u32 indexR2 = indexR + 2; //next sample s16 l1 = m_buffer[indexR & INDEX_MASK]; //current s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current samples[currentSample] = l1; samples[currentSample + 1] = r1; indexR += 2; } aid_sample_rate_ = sample_rate; int realSamples = currentSample; if (currentSample < numSamples * 2) underrunCount_++; // Padding with the last value to reduce clicking short s[2]; s[0] = clamp_s16(m_buffer[(indexR - 1) & INDEX_MASK]); s[1] = clamp_s16(m_buffer[(indexR - 2) & INDEX_MASK]); for (; currentSample < numSamples * 2; currentSample += 2) { samples[currentSample] = s[0]; samples[currentSample + 1] = s[1]; } // Flush cached variable Common::AtomicStore(m_indexR, indexR); //if (realSamples != numSamples * 2) { // ILOG("Underrun! %i / %i", realSamples / 2, numSamples); //} lastBufSize_ = (m_indexW - m_indexR) & INDEX_MASK; return realSamples / 2; }
inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size) { #ifdef _M_SSE // Size will always be 16-byte aligned as the hwBlockSize is. while (size >= 8) { __m128i in1 = _mm_loadu_si128((__m128i *)in); __m128i in2 = _mm_loadu_si128((__m128i *)(in + 4)); __m128i packed = _mm_packs_epi32(in1, in2); _mm_storeu_si128((__m128i *)out, packed); out += 8; in += 8; size -= 8; } #endif for (size_t i = 0; i < size; i++) out[i] = clamp_s16(in[i]); }
// Mix samples from the various audio channels into a single sample queue. // This single sample queue is where __AudioMix should read from. If the sample queue is full, we should // just sleep the main emulator thread a little. void __AudioUpdate() { // Audio throttle doesn't really work on the PSP since the mixing intervals are so closely tied // to the CPU. Much better to throttle the frame rate on frame display and just throw away audio // if the buffer somehow gets full. bool firstChannel = true; for (u32 i = 0; i < PSP_AUDIO_CHANNEL_MAX + 1; i++) { if (!chans[i].reserved) continue; __AudioWakeThreads(chans[i], 0, hwBlockSize); if (!chans[i].sampleQueue.size()) { continue; } if (hwBlockSize * 2 > chans[i].sampleQueue.size()) { ERROR_LOG(SCEAUDIO, "Channel %i buffer underrun at %i of %i", i, (int)chans[i].sampleQueue.size() / 2, hwBlockSize); } const s16 *buf1 = 0, *buf2 = 0; size_t sz1, sz2; chans[i].sampleQueue.popPointers(hwBlockSize * 2, &buf1, &sz1, &buf2, &sz2); if (firstChannel) { for (size_t s = 0; s < sz1; s++) mixBuffer[s] = buf1[s]; if (buf2) { for (size_t s = 0; s < sz2; s++) mixBuffer[s + sz1] = buf2[s]; } firstChannel = false; } else { for (size_t s = 0; s < sz1; s++) mixBuffer[s] += buf1[s]; if (buf2) { for (size_t s = 0; s < sz2; s++) mixBuffer[s + sz1] += buf2[s]; } } } if (firstChannel) { memset(mixBuffer, 0, hwBlockSize * 2 * sizeof(s32)); } if (g_Config.bEnableSound) { lock_guard guard(section); if (outAudioQueue.room() >= hwBlockSize * 2) { s16 *buf1 = 0, *buf2 = 0; size_t sz1, sz2; outAudioQueue.pushPointers(hwBlockSize * 2, &buf1, &sz1, &buf2, &sz2); for (size_t s = 0; s < sz1; s++) buf1[s] = clamp_s16(mixBuffer[s]); if (buf2) { for (size_t s = 0; s < sz2; s++) buf2[s] = clamp_s16(mixBuffer[s + sz1]); } } else { // This happens quite a lot. There's still something slightly off // about the amount of audio we produce. } } }
// Executed from sound stream thread unsigned int StereoResampler::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) { if (!samples) return 0; unsigned int currentSample = 0; // Cache access in non-volatile variable // This is the only function changing the read value, so it's safe to // cache it locally although it's written here. // The writing pointer will be modified outside, but it will only increase, // so we will just ignore new written data while interpolating. // Without this cache, the compiler wouldn't be allowed to optimize the // interpolation loop. u32 indexR = Common::AtomicLoad(m_indexR); u32 indexW = Common::AtomicLoad(m_indexW); const int INDEX_MASK = (m_bufsize * 2 - 1); // We force on the audio resampler if the output sample rate doesn't match the input. if (!g_Config.bAudioResampler && sample_rate == (int)m_input_sample_rate) { for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) { s16 l1 = m_buffer[indexR & INDEX_MASK]; //current s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current samples[currentSample] = l1; samples[currentSample + 1] = r1; indexR += 2; } sample_rate_ = (float)sample_rate; } else { // Drift prevention mechanism float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2); m_numLeftI = (numLeft + m_numLeftI*(CONTROL_AVG - 1)) / CONTROL_AVG; float offset = (m_numLeftI - m_lowwatermark) * CONTROL_FACTOR; if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT; if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT; sample_rate_ = (float)(m_input_sample_rate + offset); const u32 ratio = (u32)(65536.0 * sample_rate_ / (double)sample_rate); // TODO: consider a higher-quality resampling algorithm. // TODO: Add a fast path for 1:1. for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) { u32 indexR2 = indexR + 2; //next sample s16 l1 = m_buffer[indexR & INDEX_MASK]; //current s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current s16 l2 = m_buffer[indexR2 & INDEX_MASK]; //next s16 r2 = m_buffer[(indexR2 + 1) & INDEX_MASK]; //next int sampleL = ((l1 << 16) + (l2 - l1) * (u16)m_frac) >> 16; int sampleR = ((r1 << 16) + (r2 - r1) * (u16)m_frac) >> 16; samples[currentSample] = sampleL; samples[currentSample + 1] = sampleR; m_frac += ratio; indexR += 2 * (u16)(m_frac >> 16); m_frac &= 0xffff; } } int realSamples = currentSample; if (currentSample < numSamples * 2) underrunCount_++; // Padding with the last value to reduce clicking short s[2]; s[0] = clamp_s16(m_buffer[(indexR - 1) & INDEX_MASK]); s[1] = clamp_s16(m_buffer[(indexR - 2) & INDEX_MASK]); for (; currentSample < numSamples * 2; currentSample += 2) { samples[currentSample] = s[0]; samples[currentSample + 1] = s[1]; } // Flush cached variable Common::AtomicStore(m_indexR, indexR); //if (realSamples != numSamples * 2) { // ILOG("Underrun! %i / %i", realSamples / 2, numSamples); //} lastBufSize_ = (m_indexW - m_indexR) & INDEX_MASK; return realSamples / 2; }
// Mix samples from the various audio channels into a single sample queue. // This single sample queue is where __AudioMix should read from. If the sample queue is full, we should // just sleep the main emulator thread a little. void __AudioUpdate() { // Audio throttle doesn't really work on the PSP since the mixing intervals are so closely tied // to the CPU. Much better to throttle the frame rate on frame display and just throw away audio // if the buffer somehow gets full. bool firstChannel = true; for (u32 i = 0; i < PSP_AUDIO_CHANNEL_MAX + 1; i++) { if (!chans[i].reserved) continue; __AudioWakeThreads(chans[i], 0, hwBlockSize); if (!chans[i].sampleQueue.size()) { continue; } if (hwBlockSize * 2 > (int)chans[i].sampleQueue.size()) { ERROR_LOG(SCEAUDIO, "Channel %i buffer underrun at %i of %i", i, (int)chans[i].sampleQueue.size() / 2, hwBlockSize); } const s16 *buf1 = 0, *buf2 = 0; size_t sz1, sz2; chans[i].sampleQueue.popPointers(hwBlockSize * 2, &buf1, &sz1, &buf2, &sz2); if (firstChannel) { for (size_t s = 0; s < sz1; s++) mixBuffer[s] = buf1[s]; if (buf2) { for (size_t s = 0; s < sz2; s++) mixBuffer[s + sz1] = buf2[s]; } firstChannel = false; } else { // Surprisingly hard to SIMD efficiently on SSE2 due to lack of 16-to-32-bit sign extension. NEON should be straight-forward though, and SSE4.1 can do it nicely. // Actually, the cmple/pack trick should work fine... for (size_t s = 0; s < sz1; s++) mixBuffer[s] += buf1[s]; if (buf2) { for (size_t s = 0; s < sz2; s++) mixBuffer[s + sz1] += buf2[s]; } } } if (firstChannel) { // Nothing was written above, let's memset. memset(mixBuffer, 0, hwBlockSize * 2 * sizeof(s32)); } if (g_Config.bEnableSound) { resampler.PushSamples(mixBuffer, hwBlockSize); #ifndef MOBILE_DEVICE if (!m_logAudio) { if (g_Config.bDumpAudio) { std::string audio_file_name = GetSysDirectory(DIRECTORY_AUDIO) + "audiodump.wav"; // Create the path just in case it doesn't exist File::CreateDir(GetSysDirectory(DIRECTORY_AUDIO)); File::CreateEmptyFile(audio_file_name); __StartLogAudio(audio_file_name); } } else { if (g_Config.bDumpAudio) { for (int i = 0; i < hwBlockSize * 2; i++) { clampedMixBuffer[i] = clamp_s16(mixBuffer[i]); } g_wave_writer.AddStereoSamples(clampedMixBuffer, hwBlockSize); } else { __StopLogAudio(); } } #endif } }