TimeUnit AudioSink::GetEndTime() const { int64_t written; { MonitorAutoLock mon(mMonitor); written = mWritten; } TimeUnit played = FramesToTimeUnit(written, mOutputRate) + mStartTime; if (!played.IsValid()) { NS_WARNING("Int overflow calculating audio end time"); return TimeUnit::Zero(); } return played; }
TimeUnit AudioSink::GetEndTime() const { int64_t written; { MonitorAutoLock mon(mMonitor); written = mWritten; } TimeUnit played = FramesToTimeUnit(written, mOutputRate) + mStartTime; if (!played.IsValid()) { NS_WARNING("Int overflow calculating audio end time"); return TimeUnit::Zero(); } // As we may be resampling, rounding errors may occur. Ensure we never get // past the original end time. return std::min(mLastEndTime, played); }
already_AddRefed<AudioData> AudioSink::CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer, AudioData* aReference) { uint32_t frames = aBuffer.Length() / mOutputChannels; if (!frames) { return nullptr; } auto duration = FramesToTimeUnit(frames, mOutputRate); if (!duration.IsValid()) { NS_WARNING("Int overflow in AudioSink"); mErrored = true; return nullptr; } RefPtr<AudioData> data = new AudioData(aReference->mOffset, aReference->mTime, duration, frames, Move(aBuffer), mOutputChannels, mOutputRate); return data.forget(); }
RefPtr<MediaDataDecoder::DecodePromise> VorbisDataDecoder::ProcessDecode(MediaRawData* aSample) { MOZ_ASSERT(mTaskQueue->IsCurrentThreadIn()); const unsigned char* aData = aSample->Data(); size_t aLength = aSample->Size(); int64_t aOffset = aSample->mOffset; MOZ_ASSERT(mPacketCount >= 3); if (!mLastFrameTime || mLastFrameTime.ref() != aSample->mTime.ToMicroseconds()) { // We are starting a new block. mFrames = 0; mLastFrameTime = Some(aSample->mTime.ToMicroseconds()); } ogg_packet pkt = InitVorbisPacket( aData, aLength, false, aSample->mEOS, aSample->mTimecode.ToMicroseconds(), mPacketCount++); int err = vorbis_synthesis(&mVorbisBlock, &pkt); if (err) { return DecodePromise::CreateAndReject( MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("vorbis_synthesis:%d", err)), __func__); } err = vorbis_synthesis_blockin(&mVorbisDsp, &mVorbisBlock); if (err) { return DecodePromise::CreateAndReject( MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("vorbis_synthesis_blockin:%d", err)), __func__); } VorbisPCMValue** pcm = 0; int32_t frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm); if (frames == 0) { return DecodePromise::CreateAndResolve(DecodedData(), __func__); } DecodedData results; while (frames > 0) { uint32_t channels = mVorbisDsp.vi->channels; uint32_t rate = mVorbisDsp.vi->rate; AlignedAudioBuffer buffer(frames*channels); if (!buffer) { return DecodePromise::CreateAndReject( MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__), __func__); } for (uint32_t j = 0; j < channels; ++j) { VorbisPCMValue* channel = pcm[j]; for (uint32_t i = 0; i < uint32_t(frames); ++i) { buffer[i*channels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]); } } auto duration = FramesToTimeUnit(frames, rate); if (!duration.IsValid()) { return DecodePromise::CreateAndReject( MediaResult(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL("Overflow converting audio duration")), __func__); } auto total_duration = FramesToTimeUnit(mFrames, rate); if (!total_duration.IsValid()) { return DecodePromise::CreateAndReject( MediaResult(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL("Overflow converting audio total_duration")), __func__); } auto time = total_duration + aSample->mTime; if (!time.IsValid()) { return DecodePromise::CreateAndReject( MediaResult( NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL("Overflow adding total_duration and aSample->mTime")), __func__); }; if (!mAudioConverter) { AudioConfig in( AudioConfig::ChannelLayout(channels, VorbisLayout(channels)), rate); AudioConfig out(channels, rate); if (!in.IsValid() || !out.IsValid()) { return DecodePromise::CreateAndReject( MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("Invalid channel layout:%u", channels)), __func__); } mAudioConverter = MakeUnique<AudioConverter>(in, out); } MOZ_ASSERT(mAudioConverter->CanWorkInPlace()); AudioSampleBuffer data(Move(buffer)); data = mAudioConverter->Process(Move(data)); results.AppendElement(new AudioData(aOffset, time, duration, frames, data.Forget(), channels, rate)); mFrames += frames; err = vorbis_synthesis_read(&mVorbisDsp, frames); if (err) { return DecodePromise::CreateAndReject( MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("vorbis_synthesis_read:%d", err)), __func__); } frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm); } return DecodePromise::CreateAndResolve(Move(results), __func__); }
MediaResult FFmpegAudioDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample) { AVPacket packet; mLib->av_init_packet(&packet); packet.data = const_cast<uint8_t*>(aSample->Data()); packet.size = aSample->Size(); if (!PrepareFrame()) { return MediaResult( NS_ERROR_OUT_OF_MEMORY, RESULT_DETAIL("FFmpeg audio decoder failed to allocate frame")); } int64_t samplePosition = aSample->mOffset; media::TimeUnit pts = media::TimeUnit::FromMicroseconds(aSample->mTime); while (packet.size > 0) { int decoded; int bytesConsumed = mLib->avcodec_decode_audio4(mCodecContext, mFrame, &decoded, &packet); if (bytesConsumed < 0) { NS_WARNING("FFmpeg audio decoder error."); return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("FFmpeg audio error:%d", bytesConsumed)); } if (mFrame->format != AV_SAMPLE_FMT_FLT && mFrame->format != AV_SAMPLE_FMT_FLTP && mFrame->format != AV_SAMPLE_FMT_S16 && mFrame->format != AV_SAMPLE_FMT_S16P && mFrame->format != AV_SAMPLE_FMT_S32 && mFrame->format != AV_SAMPLE_FMT_S32P) { return MediaResult( NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("FFmpeg audio decoder outputs unsupported audio format")); } if (decoded) { uint32_t numChannels = mCodecContext->channels; AudioConfig::ChannelLayout layout(numChannels); if (!layout.IsValid()) { return MediaResult( NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("Unsupported channel layout:%u", numChannels)); } uint32_t samplingRate = mCodecContext->sample_rate; AlignedAudioBuffer audio = CopyAndPackAudio(mFrame, numChannels, mFrame->nb_samples); if (!audio) { return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__); } media::TimeUnit duration = FramesToTimeUnit(mFrame->nb_samples, samplingRate); if (!duration.IsValid()) { return MediaResult( NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL("Invalid sample duration")); } RefPtr<AudioData> data = new AudioData(samplePosition, pts.ToMicroseconds(), duration.ToMicroseconds(), mFrame->nb_samples, Move(audio), numChannels, samplingRate); mCallback->Output(data); pts += duration; if (!pts.IsValid()) { return MediaResult( NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL("Invalid count of accumulated audio samples")); } } packet.data += bytesConsumed; packet.size -= bytesConsumed; samplePosition += bytesConsumed; } return NS_OK; }
HRESULT WMFAudioMFTManager::Output(int64_t aStreamOffset, nsRefPtr<MediaData>& aOutData) { aOutData = nullptr; RefPtr<IMFSample> sample; HRESULT hr; int typeChangeCount = 0; while (true) { hr = mDecoder->Output(&sample); if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) { return hr; } if (hr == MF_E_TRANSFORM_STREAM_CHANGE) { hr = UpdateOutputType(); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); // Catch infinite loops, but some decoders perform at least 2 stream // changes on consecutive calls, so be permissive. // 100 is arbitrarily > 2. NS_ENSURE_TRUE(typeChangeCount < 100, MF_E_TRANSFORM_STREAM_CHANGE); ++typeChangeCount; continue; } break; } NS_ENSURE_TRUE(SUCCEEDED(hr), hr); RefPtr<IMFMediaBuffer> buffer; hr = sample->ConvertToContiguousBuffer(byRef(buffer)); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); BYTE* data = nullptr; // Note: *data will be owned by the IMFMediaBuffer, we don't need to free it. DWORD maxLength = 0, currentLength = 0; hr = buffer->Lock(&data, &maxLength, ¤tLength); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); // Sometimes when starting decoding, the AAC decoder gives us samples // with a negative timestamp. AAC does usually have preroll (or encoder // delay) encoded into its bitstream, but the amount encoded to the stream // is variable, and it not signalled in-bitstream. There is sometimes // signalling in the MP4 container what the preroll amount, but it's // inconsistent. It looks like WMF's AAC encoder may take this into // account, so strip off samples with a negative timestamp to get us // to a 0-timestamp start. This seems to maintain A/V sync, so we can run // with this until someone complains... // We calculate the timestamp and the duration based on the number of audio // frames we've already played. We don't trust the timestamp stored on the // IMFSample, as sometimes it's wrong, possibly due to buggy encoders? // If this sample block comes after a discontinuity (i.e. a gap or seek) // reset the frame counters, and capture the timestamp. Future timestamps // will be offset from this block's timestamp. UINT32 discontinuity = false; sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity); if (mMustRecaptureAudioPosition || discontinuity) { // Update the output type, in case this segment has a different // rate. This also triggers on the first sample, which can have a // different rate than is advertised in the container, and sometimes we // don't get a MF_E_TRANSFORM_STREAM_CHANGE when the rate changes. hr = UpdateOutputType(); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); mAudioFrameSum = 0; LONGLONG timestampHns = 0; hr = sample->GetSampleTime(×tampHns); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); mAudioTimeOffset = media::TimeUnit::FromMicroseconds(timestampHns / 10); mMustRecaptureAudioPosition = false; } // We can assume PCM 16 output. int32_t numSamples = currentLength / 2; int32_t numFrames = numSamples / mAudioChannels; MOZ_ASSERT(numFrames >= 0); MOZ_ASSERT(numSamples >= 0); if (numFrames == 0) { // All data from this chunk stripped, loop back and try to output the next // frame, if possible. return S_OK; } nsAutoArrayPtr<AudioDataValue> audioData(new AudioDataValue[numSamples]); int16_t* pcm = (int16_t*)data; for (int32_t i = 0; i < numSamples; ++i) { audioData[i] = AudioSampleToFloat(pcm[i]); } buffer->Unlock(); media::TimeUnit timestamp = mAudioTimeOffset + FramesToTimeUnit(mAudioFrameSum, mAudioRate); NS_ENSURE_TRUE(timestamp.IsValid(), E_FAIL); mAudioFrameSum += numFrames; media::TimeUnit duration = FramesToTimeUnit(numFrames, mAudioRate); NS_ENSURE_TRUE(duration.IsValid(), E_FAIL); aOutData = new AudioData(aStreamOffset, timestamp.ToMicroseconds(), duration.ToMicroseconds(), numFrames, audioData.forget(), mAudioChannels, mAudioRate); #ifdef LOG_SAMPLE_DECODE LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u", timestamp.ToMicroseconds(), duration.ToMicroseconds(), currentLength); #endif return S_OK; }
void FFmpegAudioDecoder<LIBAV_VER>::DecodePacket(MediaRawData* aSample) { AVPacket packet; av_init_packet(&packet); packet.data = const_cast<uint8_t*>(aSample->Data()); packet.size = aSample->Size(); if (!PrepareFrame()) { NS_WARNING("FFmpeg audio decoder failed to allocate frame."); mCallback->Error(); return; } int64_t samplePosition = aSample->mOffset; media::TimeUnit pts = media::TimeUnit::FromMicroseconds(aSample->mTime); while (packet.size > 0) { int decoded; int bytesConsumed = avcodec_decode_audio4(mCodecContext, mFrame, &decoded, &packet); if (bytesConsumed < 0) { NS_WARNING("FFmpeg audio decoder error."); mCallback->Error(); return; } if (decoded) { uint32_t numChannels = mCodecContext->channels; uint32_t samplingRate = mCodecContext->sample_rate; nsAutoArrayPtr<AudioDataValue> audio( CopyAndPackAudio(mFrame, numChannels, mFrame->nb_samples)); media::TimeUnit duration = FramesToTimeUnit(mFrame->nb_samples, samplingRate); if (!duration.IsValid()) { NS_WARNING("Invalid count of accumulated audio samples"); mCallback->Error(); return; } nsRefPtr<AudioData> data = new AudioData(samplePosition, pts.ToMicroseconds(), duration.ToMicroseconds(), mFrame->nb_samples, audio.forget(), numChannels, samplingRate); mCallback->Output(data); pts += duration; if (!pts.IsValid()) { NS_WARNING("Invalid count of accumulated audio samples"); mCallback->Error(); return; } } packet.data += bytesConsumed; packet.size -= bytesConsumed; samplePosition += bytesConsumed; } if (mTaskQueue->IsEmpty()) { mCallback->InputExhausted(); } }
void FFmpegAudioDecoder<LIBAV_VER>::DecodePacket(MediaRawData* aSample) { MOZ_ASSERT(mTaskQueue->IsCurrentThreadIn()); AVPacket packet; mLib->av_init_packet(&packet); packet.data = const_cast<uint8_t*>(aSample->Data()); packet.size = aSample->Size(); if (!PrepareFrame()) { NS_WARNING("FFmpeg audio decoder failed to allocate frame."); mCallback->Error(); return; } int64_t samplePosition = aSample->mOffset; media::TimeUnit pts = media::TimeUnit::FromMicroseconds(aSample->mTime); while (packet.size > 0) { int decoded; int bytesConsumed = mLib->avcodec_decode_audio4(mCodecContext, mFrame, &decoded, &packet); if (bytesConsumed < 0) { NS_WARNING("FFmpeg audio decoder error."); mCallback->Error(); return; } if (decoded) { uint32_t numChannels = mCodecContext->channels; AudioConfig::ChannelLayout layout(numChannels); if (!layout.IsValid()) { mCallback->Error(); return; } uint32_t samplingRate = mCodecContext->sample_rate; AlignedAudioBuffer audio = CopyAndPackAudio(mFrame, numChannels, mFrame->nb_samples); media::TimeUnit duration = FramesToTimeUnit(mFrame->nb_samples, samplingRate); if (!audio || !duration.IsValid()) { NS_WARNING("Invalid count of accumulated audio samples"); mCallback->Error(); return; } RefPtr<AudioData> data = new AudioData(samplePosition, pts.ToMicroseconds(), duration.ToMicroseconds(), mFrame->nb_samples, Move(audio), numChannels, samplingRate); mCallback->Output(data); pts += duration; if (!pts.IsValid()) { NS_WARNING("Invalid count of accumulated audio samples"); mCallback->Error(); return; } } packet.data += bytesConsumed; packet.size -= bytesConsumed; samplePosition += bytesConsumed; } if (mTaskQueue->IsEmpty()) { mCallback->InputExhausted(); } }
RefPtr<MediaDataDecoder::DecodePromise> WaveDataDecoder::ProcessDecode(MediaRawData* aSample) { size_t aLength = aSample->Size(); BufferReader aReader(aSample->Data(), aLength); int64_t aOffset = aSample->mOffset; int32_t frames = aLength * 8 / mInfo.mBitDepth / mInfo.mChannels; AlignedAudioBuffer buffer(frames * mInfo.mChannels); if (!buffer) { return DecodePromise::CreateAndReject( MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__), __func__); } for (int i = 0; i < frames; ++i) { for (unsigned int j = 0; j < mInfo.mChannels; ++j) { if (mInfo.mProfile == 6) { //ALAW Data auto res = aReader.ReadU8(); if (res.isErr()) { return DecodePromise::CreateAndReject( MediaResult(res.unwrapErr(), __func__), __func__); } int16_t decoded = DecodeALawSample(res.unwrap()); buffer[i * mInfo.mChannels + j] = IntegerToAudioSample<AudioDataValue>(decoded); } else if (mInfo.mProfile == 7) { //ULAW Data auto res = aReader.ReadU8(); if (res.isErr()) { return DecodePromise::CreateAndReject( MediaResult(res.unwrapErr(), __func__), __func__); } int16_t decoded = DecodeULawSample(res.unwrap()); buffer[i * mInfo.mChannels + j] = IntegerToAudioSample<AudioDataValue>(decoded); } else { //PCM Data if (mInfo.mBitDepth == 8) { auto res = aReader.ReadU8(); if (res.isErr()) { return DecodePromise::CreateAndReject( MediaResult(res.unwrapErr(), __func__), __func__); } buffer[i * mInfo.mChannels + j] = UInt8bitToAudioSample<AudioDataValue>(res.unwrap()); } else if (mInfo.mBitDepth == 16) { auto res = aReader.ReadLE16(); if (res.isErr()) { return DecodePromise::CreateAndReject( MediaResult(res.unwrapErr(), __func__), __func__); } buffer[i * mInfo.mChannels + j] = IntegerToAudioSample<AudioDataValue>(res.unwrap()); } else if (mInfo.mBitDepth == 24) { auto res = aReader.ReadLE24(); if (res.isErr()) { return DecodePromise::CreateAndReject( MediaResult(res.unwrapErr(), __func__), __func__); } buffer[i * mInfo.mChannels + j] = Int24bitToAudioSample<AudioDataValue>(res.unwrap()); } } } } auto duration = FramesToTimeUnit(frames, mInfo.mRate); return DecodePromise::CreateAndResolve( DecodedData{ new AudioData(aOffset, aSample->mTime, duration, frames, Move(buffer), mInfo.mChannels, mInfo.mRate) }, __func__); }
nsresult AppleATDecoder::DecodeSample(MediaRawData* aSample) { // Array containing the queued decoded audio frames, about to be output. nsTArray<AudioDataValue> outputData; UInt32 channels = mOutputFormat.mChannelsPerFrame; // Pick a multiple of the frame size close to a power of two // for efficient allocation. const uint32_t MAX_AUDIO_FRAMES = 128; const uint32_t maxDecodedSamples = MAX_AUDIO_FRAMES * channels; // Descriptions for _decompressed_ audio packets. ignored. nsAutoArrayPtr<AudioStreamPacketDescription> packets(new AudioStreamPacketDescription[MAX_AUDIO_FRAMES]); // This API insists on having packets spoon-fed to it from a callback. // This structure exists only to pass our state. PassthroughUserData userData = { channels, (UInt32)aSample->Size(), aSample->Data() }; // Decompressed audio buffer nsAutoArrayPtr<AudioDataValue> decoded(new AudioDataValue[maxDecodedSamples]); do { AudioBufferList decBuffer; decBuffer.mNumberBuffers = 1; decBuffer.mBuffers[0].mNumberChannels = channels; decBuffer.mBuffers[0].mDataByteSize = maxDecodedSamples * sizeof(AudioDataValue); decBuffer.mBuffers[0].mData = decoded.get(); // in: the max number of packets we can handle from the decoder. // out: the number of packets the decoder is actually returning. UInt32 numFrames = MAX_AUDIO_FRAMES; OSStatus rv = AudioConverterFillComplexBuffer(mConverter, _PassthroughInputDataCallback, &userData, &numFrames /* in/out */, &decBuffer, packets.get()); if (rv && rv != kNoMoreDataErr) { LOG("Error decoding audio stream: %d\n", rv); return NS_ERROR_FAILURE; } if (numFrames) { outputData.AppendElements(decoded.get(), numFrames * channels); } if (rv == kNoMoreDataErr) { break; } } while (true); if (outputData.IsEmpty()) { return NS_OK; } size_t numFrames = outputData.Length() / channels; int rate = mOutputFormat.mSampleRate; media::TimeUnit duration = FramesToTimeUnit(numFrames, rate); if (!duration.IsValid()) { NS_WARNING("Invalid count of accumulated audio samples"); return NS_ERROR_FAILURE; } #ifdef LOG_SAMPLE_DECODE LOG("pushed audio at time %lfs; duration %lfs\n", (double)aSample->mTime / USECS_PER_S, duration.ToSeconds()); #endif nsAutoArrayPtr<AudioDataValue> data(new AudioDataValue[outputData.Length()]); PodCopy(data.get(), &outputData[0], outputData.Length()); RefPtr<AudioData> audio = new AudioData(aSample->mOffset, aSample->mTime, duration.ToMicroseconds(), numFrames, data.forget(), channels, rate); mCallback->Output(audio); return NS_OK; }
MediaResult AppleATDecoder::DecodeSample(MediaRawData* aSample) { MOZ_ASSERT(mTaskQueue->IsCurrentThreadIn()); // Array containing the queued decoded audio frames, about to be output. nsTArray<AudioDataValue> outputData; UInt32 channels = mOutputFormat.mChannelsPerFrame; // Pick a multiple of the frame size close to a power of two // for efficient allocation. const uint32_t MAX_AUDIO_FRAMES = 128; const uint32_t maxDecodedSamples = MAX_AUDIO_FRAMES * channels; // Descriptions for _decompressed_ audio packets. ignored. auto packets = MakeUnique<AudioStreamPacketDescription[]>(MAX_AUDIO_FRAMES); // This API insists on having packets spoon-fed to it from a callback. // This structure exists only to pass our state. PassthroughUserData userData = { channels, (UInt32)aSample->Size(), aSample->Data() }; // Decompressed audio buffer AlignedAudioBuffer decoded(maxDecodedSamples); if (!decoded) { return NS_ERROR_OUT_OF_MEMORY; } do { AudioBufferList decBuffer; decBuffer.mNumberBuffers = 1; decBuffer.mBuffers[0].mNumberChannels = channels; decBuffer.mBuffers[0].mDataByteSize = maxDecodedSamples * sizeof(AudioDataValue); decBuffer.mBuffers[0].mData = decoded.get(); // in: the max number of packets we can handle from the decoder. // out: the number of packets the decoder is actually returning. UInt32 numFrames = MAX_AUDIO_FRAMES; OSStatus rv = AudioConverterFillComplexBuffer(mConverter, _PassthroughInputDataCallback, &userData, &numFrames /* in/out */, &decBuffer, packets.get()); if (rv && rv != kNoMoreDataErr) { LOG("Error decoding audio sample: %d\n", rv); return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("Error decoding audio sample: %d @ %lld", rv, aSample->mTime)); } if (numFrames) { outputData.AppendElements(decoded.get(), numFrames * channels); } if (rv == kNoMoreDataErr) { break; } } while (true); if (outputData.IsEmpty()) { return NS_OK; } size_t numFrames = outputData.Length() / channels; int rate = mOutputFormat.mSampleRate; media::TimeUnit duration = FramesToTimeUnit(numFrames, rate); if (!duration.IsValid()) { NS_WARNING("Invalid count of accumulated audio samples"); return MediaResult( NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL( "Invalid count of accumulated audio samples: num:%llu rate:%d", uint64_t(numFrames), rate)); } #ifdef LOG_SAMPLE_DECODE LOG("pushed audio at time %lfs; duration %lfs\n", (double)aSample->mTime / USECS_PER_S, duration.ToSeconds()); #endif AudioSampleBuffer data(outputData.Elements(), outputData.Length()); if (!data.Data()) { return NS_ERROR_OUT_OF_MEMORY; } if (mChannelLayout && !mAudioConverter) { AudioConfig in(*mChannelLayout.get(), rate); AudioConfig out(channels, rate); if (!in.IsValid() || !out.IsValid()) { return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("Invalid audio config")); } mAudioConverter = MakeUnique<AudioConverter>(in, out); } if (mAudioConverter) { MOZ_ASSERT(mAudioConverter->CanWorkInPlace()); data = mAudioConverter->Process(Move(data)); } RefPtr<AudioData> audio = new AudioData(aSample->mOffset, aSample->mTime, duration.ToMicroseconds(), numFrames, data.Forget(), channels, rate); mCallback->Output(audio); return NS_OK; }
MediaResult FFmpegAudioDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample, uint8_t* aData, int aSize, bool* aGotFrame, DecodedData& aResults) { AVPacket packet; mLib->av_init_packet(&packet); packet.data = const_cast<uint8_t*>(aData); packet.size = aSize; if (aGotFrame) { *aGotFrame = false; } if (!PrepareFrame()) { return MediaResult( NS_ERROR_OUT_OF_MEMORY, RESULT_DETAIL("FFmpeg audio decoder failed to allocate frame")); } int64_t samplePosition = aSample->mOffset; media::TimeUnit pts = aSample->mTime; while (packet.size > 0) { int decoded; int bytesConsumed = mLib->avcodec_decode_audio4(mCodecContext, mFrame, &decoded, &packet); if (bytesConsumed < 0) { NS_WARNING("FFmpeg audio decoder error."); return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("FFmpeg audio error:%d", bytesConsumed)); } if (decoded) { if (mFrame->format != AV_SAMPLE_FMT_FLT && mFrame->format != AV_SAMPLE_FMT_FLTP && mFrame->format != AV_SAMPLE_FMT_S16 && mFrame->format != AV_SAMPLE_FMT_S16P && mFrame->format != AV_SAMPLE_FMT_S32 && mFrame->format != AV_SAMPLE_FMT_S32P) { return MediaResult( NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL( "FFmpeg audio decoder outputs unsupported audio format")); } uint32_t numChannels = mCodecContext->channels; uint32_t samplingRate = mCodecContext->sample_rate; AlignedAudioBuffer audio = CopyAndPackAudio(mFrame, numChannels, mFrame->nb_samples); if (!audio) { return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__); } media::TimeUnit duration = FramesToTimeUnit(mFrame->nb_samples, samplingRate); if (!duration.IsValid()) { return MediaResult(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL("Invalid sample duration")); } media::TimeUnit newpts = pts + duration; if (!newpts.IsValid()) { return MediaResult( NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL("Invalid count of accumulated audio samples")); } aResults.AppendElement(new AudioData(samplePosition, pts, duration, mFrame->nb_samples, Move(audio), numChannels, samplingRate, mCodecContext->channel_layout)); pts = newpts; if (aGotFrame) { *aGotFrame = true; } } packet.data += bytesConsumed; packet.size -= bytesConsumed; samplePosition += bytesConsumed; } return NS_OK; }