AudioDecoderSpeex::AudioDecoderSpeex() : _speex_dec_state(speex_decoder_init(&speex_wb_mode)) { if (!_speex_dec_state) { throw MediaException(_("AudioDecoderSpeex: state initialization failed.")); } speex_bits_init(&_speex_bits); speex_decoder_ctl(_speex_dec_state, SPEEX_GET_FRAME_SIZE, &_speex_framesize); #ifdef RESAMPLING_SPEEX int err = 0; _resampler = speex_resampler_init(1, 16000, 44100, SPEEX_RESAMPLER_QUALITY_DEFAULT, &err); if (err != RESAMPLER_ERR_SUCCESS) { throw MediaException(_("AudioDecoderSpeex: initialization failed.")); } spx_uint32_t num = 0, den = 0; speex_resampler_get_ratio (_resampler, &num, &den); assert(num && den); boost::rational<boost::uint32_t> numsamples(den, num); numsamples *= _speex_framesize * 2 /* convert to stereo */; _target_frame_size = boost::rational_cast<boost::uint32_t>(numsamples); #endif }
// Resamples input data to an output buffer, according to |mBufferSampleRate| and // the playbackRate/detune. // The number of frames consumed/produced depends on the amount of space // remaining in both the input and output buffer, and the playback rate (that // is, the ratio between the output samplerate and the input samplerate). void CopyFromInputBufferWithResampling(AudioBlock* aOutput, uint32_t aChannels, uint32_t* aOffsetWithinBlock, uint32_t aAvailableInOutput, StreamTime* aCurrentPosition, uint32_t aBufferMax) { if (*aOffsetWithinBlock == 0) { aOutput->AllocateChannels(aChannels); } SpeexResamplerState* resampler = mResampler; MOZ_ASSERT(aChannels > 0); if (mBufferPosition < aBufferMax) { uint32_t availableInInputBuffer = aBufferMax - mBufferPosition; uint32_t ratioNum, ratioDen; speex_resampler_get_ratio(resampler, &ratioNum, &ratioDen); // Limit the number of input samples copied and possibly // format-converted for resampling by estimating how many will be used. // This may be a little small if still filling the resampler with // initial data, but we'll get called again and it will work out. uint32_t inputLimit = aAvailableInOutput * ratioNum / ratioDen + 10; if (!BegunResampling()) { // First time the resampler is used. uint32_t inputLatency = speex_resampler_get_input_latency(resampler); inputLimit += inputLatency; // If starting after mStart, then play from the beginning of the // buffer, but correct for input latency. If starting before mStart, // then align the resampler so that the time corresponding to the // first input sample is mStart. int64_t skipFracNum = static_cast<int64_t>(inputLatency) * ratioDen; double leadTicks = mStart - *aCurrentPosition; if (leadTicks > 0.0) { // Round to nearest output subsample supported by the resampler at // these rates. int64_t leadSubsamples = leadTicks * ratioNum + 0.5; MOZ_ASSERT(leadSubsamples <= skipFracNum, "mBeginProcessing is wrong?"); skipFracNum -= leadSubsamples; } speex_resampler_set_skip_frac_num(resampler, std::min<int64_t>(skipFracNum, UINT32_MAX)); mBeginProcessing = -STREAM_TIME_MAX; } inputLimit = std::min(inputLimit, availableInInputBuffer); MOZ_ASSERT(mBuffer.mVolume == 1.0f); for (uint32_t i = 0; true; ) { uint32_t inSamples = inputLimit; uint32_t outSamples = aAvailableInOutput; float* outputData = aOutput->ChannelFloatsForWrite(i) + *aOffsetWithinBlock; if (mBuffer.mBufferFormat == AUDIO_FORMAT_FLOAT32) { const float* inputData = mBuffer.ChannelData<float>()[i] + mBufferPosition; WebAudioUtils::SpeexResamplerProcess(resampler, i, inputData, &inSamples, outputData, &outSamples); } else { MOZ_ASSERT(mBuffer.mBufferFormat == AUDIO_FORMAT_S16); const int16_t* inputData = mBuffer.ChannelData<int16_t>()[i] + mBufferPosition; WebAudioUtils::SpeexResamplerProcess(resampler, i, inputData, &inSamples, outputData, &outSamples); } if (++i == aChannels) { mBufferPosition += inSamples; MOZ_ASSERT(mBufferPosition <= mBufferEnd || mLoop); *aOffsetWithinBlock += outSamples; *aCurrentPosition += outSamples; if (inSamples == availableInInputBuffer && !mLoop) { // We'll feed in enough zeros to empty out the resampler's memory. // This handles the output latency as well as capturing the low // pass effects of the resample filter. mRemainingResamplerTail = 2 * speex_resampler_get_input_latency(resampler) - 1; } return; } } } else { for (uint32_t i = 0; true; ) { uint32_t inSamples = mRemainingResamplerTail; uint32_t outSamples = aAvailableInOutput; float* outputData = aOutput->ChannelFloatsForWrite(i) + *aOffsetWithinBlock; // AudioDataValue* for aIn selects the function that does not try to // copy and format-convert input data. WebAudioUtils::SpeexResamplerProcess(resampler, i, static_cast<AudioDataValue*>(nullptr), &inSamples, outputData, &outSamples); if (++i == aChannels) { MOZ_ASSERT(inSamples <= mRemainingResamplerTail); mRemainingResamplerTail -= inSamples; *aOffsetWithinBlock += outSamples; *aCurrentPosition += outSamples; break; } } } }