REFERENCE_TIME CTimeStretchFilter::DrainBuffers(IMediaSample* pSample, REFERENCE_TIME rtNewStart) { Log("TS - DrainBuffers - rtNewStart: %6.3f", rtNewStart / 10000000.0); uint unprocessedSamplesBefore = numUnprocessedSamples(); uint zeros = flushEx() - 32; // Magic 32 to keep the SoundTouch's output in sync uint unprocessedSamplesAfter = numUnprocessedSamples(); UINT32 outFramesAfter = numSamples(); UINT32 totalSamples = zeros + unprocessedSamplesBefore; UINT32 totalProcessedSamples = totalSamples - unprocessedSamplesAfter; Log("TS - DrainBuffers - unprocessedSamplesBefore: %u zeros: %u unprocessedSamplesAfter: %u outFramesAfter: %u duration %6.3f", unprocessedSamplesBefore, zeros, unprocessedSamplesAfter, outFramesAfter, (double)unprocessedSamplesBefore * (double) UNITS / (double) m_pOutputFormat->Format.nSamplesPerSec); REFERENCE_TIME rtAHwTime = 0; REFERENCE_TIME rtRCTime = 0; REFERENCE_TIME estimatedExtraSampleDuration = (((int)zeros - (int)unprocessedSamplesAfter) * UNITS) / m_pOutputFormat->Format.nSamplesPerSec; double bias = m_pClock->GetBias(); double adjustment = m_pClock->Adjustment(); m_pClock->GetHWTime(&rtRCTime, &rtAHwTime); double AVMult = m_pClock->SuggestedAudioMultiplier(rtAHwTime, rtRCTime, bias, adjustment); setTempoInternal(AVMult, 1.0); CreateOutput(totalProcessedSamples, outFramesAfter, bias, adjustment, AVMult, true); // Empty SoundTouch's buffers clear(); pSample->SetDiscontinuity(false); return estimatedExtraSampleDuration; }
HRESULT CTimeStretchFilter::CheckSample(IMediaSample* pSample) { if (!pSample) return S_OK; AM_MEDIA_TYPE *pmt = NULL; bool bFormatChanged = false; HRESULT hr = S_OK; if (SUCCEEDED(pSample->GetMediaType(&pmt)) && pmt) bFormatChanged = !FormatsEqual((WAVEFORMATEXTENSIBLE*)pmt->pbFormat, m_pInputFormat); if (bFormatChanged) { uint unprocessedSamplesBefore = numUnprocessedSamples(); uint zeros = flushEx(); uint unprocessedSamplesAfter = numUnprocessedSamples(); UINT32 outFramesAfter = numSamples(); UINT32 totalSamples = zeros + unprocessedSamplesBefore; UINT32 totalProcessedSamples = totalSamples - unprocessedSamplesAfter; //double bias = (double)totalProcessedSamples / (double)outFramesAfter; REFERENCE_TIME estimatedSampleDuration = totalProcessedSamples * UNITS / m_pOutputFormat->Format.nSamplesPerSec; double bias = m_pClock->GetBias(); double adjustment = m_pClock->Adjustment(); double AVMult = m_pClock->SuggestedAudioMultiplier(estimatedSampleDuration, bias, adjustment); setTempoInternal(AVMult, 1.0); CreateOutput(totalProcessedSamples, outFramesAfter, bias, adjustment, AVMult, true); // Empty SoundTouch's buffers clear(); // Apply format change ChannelOrder chOrder; hr = NegotiateFormat((WAVEFORMATEXTENSIBLE*)pmt->pbFormat, 1, &chOrder); pSample->SetDiscontinuity(false); if (FAILED(hr)) { DeleteMediaType(pmt); Log("CTimeStretchFilter::CheckFormat failed to change format: 0x%08x", hr); return hr; } else { m_chOrder = chOrder; return S_FALSE; // format changed } } return S_OK; }
// Processing DWORD CTimeStretchFilter::ThreadProc() { Log("CTimeStretchFilter::timestretch thread - starting up - thread ID: %d", m_ThreadId); SetThreadName(0, "TimeStretchFilter"); AudioSinkCommand command; CComPtr<IMediaSample> sample; while (true) { m_csResources.Unlock(); HRESULT hr = GetNextSampleOrCommand(&command, &sample.p, INFINITE, &m_hSampleEvents, &m_dwSampleWaitObjects); m_csResources.Lock(); if (hr == MPAR_S_THREAD_STOPPING) { Log("CTimeStretchFilter::timestretch thread - closing down - thread ID: %d", m_ThreadId); SetEvent(m_hCurrentSampleReleased); CloseThread(); m_csResources.Unlock(); return 0; } else { if (command == ASC_Flush) { Log("CTimeStretchFilter::timestretch thread - flushing"); m_rtInSampleTime = m_rtNextIncomingSampleTime = 0; if (m_pNextOutSample) m_pNextOutSample.Release(); flush(); sample.Release(); SetEvent(m_hCurrentSampleReleased); } else if (command == ASC_Pause || command == ASC_Resume) continue; else if (sample) { BYTE *pMediaBuffer = NULL; long size = sample->GetActualDataLength(); if (sample->IsDiscontinuity() == S_OK) { sample->SetDiscontinuity(false); m_bDiscontinuity = true; } if (CheckSample(sample) == S_FALSE) { DeleteMediaType(m_pMediaType); sample->GetMediaType(&m_pMediaType); } CheckStreamContinuity(sample); m_nSampleNum++; hr = sample->GetPointer(&pMediaBuffer); if ((hr == S_OK) && m_pMemAllocator) { uint unprocessedSamplesBefore = numUnprocessedSamples(); uint unprocessedSamplesAfter = 0; UINT32 nFrames = size / m_pOutputFormat->Format.nBlockAlign; REFERENCE_TIME estimatedSampleDuration = nFrames * UNITS / m_pOutputFormat->Format.nSamplesPerSec; double bias = m_pClock->GetBias(); double adjustment = m_pClock->Adjustment(); double AVMult = m_pClock->SuggestedAudioMultiplier(estimatedSampleDuration, bias, adjustment); setTempoInternal(AVMult, 1.0); // this should be the same as previous line, but in future we want to get rid of the 2nd parameter // Process the sample putSamplesInternal((const short*)pMediaBuffer, size / m_pOutputFormat->Format.nBlockAlign); unprocessedSamplesAfter = numUnprocessedSamples(); UINT32 nInFrames = (size / m_pOutputFormat->Format.nBlockAlign) - unprocessedSamplesAfter + unprocessedSamplesBefore; UINT32 nOutFrames = numSamples(); CreateOutput(nInFrames, nOutFrames, bias, adjustment, AVMult, false); } } } } }
HRESULT CTimeStretchFilter::SetFormat(const WAVEFORMATEXTENSIBLE *pwfe) { std::vector<CSoundTouchEx*>* newStreams = NULL; if (pwfe) { // First verify format is supported if (pwfe->SubFormat != KSDATAFORMAT_SUBTYPE_PCM && pwfe->SubFormat != KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) return VFW_E_TYPE_NOT_ACCEPTED; DWORD dwChannelMask = pwfe->dwChannelMask; newStreams = new std::vector<CSoundTouchEx*>; if (!newStreams) return E_OUTOFMEMORY; map<DWORD, int> inSpeakerOffset; map<DWORD, int> outSpeakerOffset; int currOffset = 0; // Each bit position in dwChannelMask corresponds to a speaker position // try every bit position from 0 to 31 for (DWORD dwSpeaker = 1; dwSpeaker != 0; dwSpeaker <<= 1) { if (dwChannelMask & dwSpeaker) { inSpeakerOffset[dwSpeaker] = currOffset; currOffset += pwfe->Format.wBitsPerSample / 8; } } ASSERT(inSpeakerOffset.size() == pwfe->Format.nChannels); // PCM output, 1-to-1 mapping of input to output outSpeakerOffset.insert(inSpeakerOffset.begin(), inSpeakerOffset.end()); // TODO: First create the base downmixing coefficients // for syncing mono channels like LFE and Center // Now start adding channels // First try all speaker pairs for (SpeakerPair *pPair = PairedSpeakers; pPair->dwLeft; pPair++) { if ((pPair->PairMask() & dwChannelMask) == pPair->PairMask()) { CSoundTouchEx* pStream = new CSoundTouchEx(); pStream->setChannels(2); pStream->SetInputChannels(inSpeakerOffset[pPair->dwLeft], inSpeakerOffset[pPair->dwRight]); pStream->SetInputFormat(pwfe->Format.nBlockAlign, pwfe->Format.wBitsPerSample / 8); pStream->SetOutputChannels(outSpeakerOffset[pPair->dwLeft], outSpeakerOffset[pPair->dwRight]); pStream->SetOutputFormat(pwfe->Format.nBlockAlign, pwfe->Format.wBitsPerSample / 8); pStream->SetupFormats(); newStreams->push_back(pStream); dwChannelMask &= ~pPair->PairMask(); // mark channels as processed } } // Then add all remaining channels as mono streams // try every bit position from 0 to 31 for (DWORD dwSpeaker = 1; dwSpeaker != 0; dwSpeaker <<= 1) { if (dwChannelMask & dwSpeaker) { CSoundTouchEx *pStream = new CSoundTouchEx(); // TODO: make this a mixing stream, so that the channel can be synchronized // to the mix of the main channels (normally Front Left/Right if available) pStream->setChannels(1); pStream->SetInputChannels(inSpeakerOffset[dwSpeaker]); pStream->SetInputFormat(pwfe->Format.nBlockAlign, pwfe->Format.wBitsPerSample / 8); pStream->SetOutputChannels(outSpeakerOffset[dwSpeaker]); pStream->SetOutputFormat(pwfe->Format.nBlockAlign, pwfe->Format.wBitsPerSample / 8); pStream->SetupFormats(); newStreams->push_back(pStream); // The following is only necessary if we skip some channels // currently we don't //dwChannelMask &= ~dwSpeaker; // mark channel as processed } } } // delete old ones std::vector<CSoundTouchEx*>* oldStreams = m_Streams; m_Streams = newStreams; if (oldStreams) { for (int i = 0; i < oldStreams->size(); i++) { SAFE_DELETE(oldStreams->at(i)); } SAFE_DELETE(oldStreams); } if (m_Streams) { setTempoInternal(m_fNewTempo, m_fNewAdjustment); setSampleRate(pwfe->Format.nSamplesPerSec); } return S_OK; }
// Processing DWORD CTimeStretchFilter::ThreadProc() { Log("CTimeStretchFilter::timestretch thread - starting up - thread ID: %d", m_ThreadId); SetThreadName(0, "TimeStretchFilter"); AudioSinkCommand command; CComPtr<IMediaSample> sample; while (true) { m_csResources.Unlock(); HRESULT hr = GetNextSampleOrCommand(&command, &sample.p, INFINITE, &m_hSampleEvents, &m_dwSampleWaitObjects); m_csResources.Lock(); if (hr == MPAR_S_THREAD_STOPPING) { Log("CTimeStretchFilter::timestretch thread - closing down - thread ID: %d", m_ThreadId); SetEvent(m_hCurrentSampleReleased); CloseThread(); m_csResources.Unlock(); return 0; } else { if (command == ASC_Flush) { Log("CTimeStretchFilter::timestretch thread - flushing"); m_rtInSampleTime = m_rtNextIncomingSampleTime = 0; m_rtLastOuputStart = m_rtLastOuputEnd = -1; if (m_pNextOutSample) m_pNextOutSample.Release(); flush(); m_pClock->Flush(); sample.Release(); SetEvent(m_hCurrentSampleReleased); } else if (command == ASC_Pause || command == ASC_Resume) continue; else if (sample) { BYTE *pMediaBuffer = NULL; long size = sample->GetActualDataLength(); if (sample->IsDiscontinuity() == S_OK) { sample->SetDiscontinuity(false); m_bDiscontinuity = true; } REFERENCE_TIME rtDrained = 0; if (CheckSample(sample, &rtDrained) == S_FALSE) { DeleteMediaType(m_pMediaType); sample->GetMediaType(&m_pMediaType); } CheckStreamContinuity(sample, rtDrained); m_nSampleNum++; hr = sample->GetPointer(&pMediaBuffer); if ((hr == S_OK) && m_pMemAllocator) { REFERENCE_TIME rtStart = 0; REFERENCE_TIME rtAdjustedStart = 0; REFERENCE_TIME rtEnd = 0; REFERENCE_TIME rtAdjustedEnd = 0; REFERENCE_TIME rtAHwTime = 0; REFERENCE_TIME rtRCTime = 0; m_pClock->GetHWTime(&rtRCTime, &rtAHwTime); sample->GetTime(&rtStart, &rtEnd); REFERENCE_TIME sampleDuration = rtEnd - rtStart; uint unprocessedSamplesBefore = numUnprocessedSamples(); uint unprocessedSamplesAfter = 0; UINT32 nFrames = size / m_pOutputFormat->Format.nBlockAlign; double bias = m_pClock->GetBias(); double adjustment = m_pClock->Adjustment(); double AVMult = m_pClock->SuggestedAudioMultiplier(rtAHwTime, rtRCTime, bias, adjustment); setTempoInternal(AVMult, 1.0); if (m_rtLastOuputEnd == -1) m_rtLastOuputEnd = rtStart / AVMult - 1; m_rtLastOuputStart = m_rtLastOuputEnd + 1; // Process the sample putSamplesInternal((const short*)pMediaBuffer, size / m_pOutputFormat->Format.nBlockAlign); unprocessedSamplesAfter = numUnprocessedSamples(); UINT32 nInFrames = (size / m_pOutputFormat->Format.nBlockAlign) - unprocessedSamplesAfter + unprocessedSamplesBefore; UINT32 nOutFrames = numSamples(); // TODO: Soundtouch can provide less samples than asked (but never more) so a cummulative error is possible. This will not happen over the course of a long TV stint, but could be solved for correctness // m_rtLastOuputEnd += (nOutFrames + unprocessedSamplesAfter - unprocessedSamplesBefore) * UNITS / m_pOutputFormat->Format.nSamplesPerSec; //rtStart = m_rtInSampleTime; rtEnd = rtStart + sampleDuration; rtAdjustedStart = m_rtLastOuputEnd +1; rtAdjustedEnd = rtAdjustedStart + sampleDuration / AVMult; m_rtLastOuputEnd += sampleDuration / AVMult; CreateOutput(nInFrames, nOutFrames, bias, adjustment, AVMult, false); m_pClock->AddSample(rtStart, rtAdjustedStart, rtEnd, rtAdjustedEnd); } } } } }