nsresult VP8TrackEncoder::GetEncodedPartitions(EncodedFrameContainer& aData) { vpx_codec_iter_t iter = nullptr; EncodedFrame::FrameType frameType = EncodedFrame::VP8_P_FRAME; nsTArray<uint8_t> frameData; nsresult rv; const vpx_codec_cx_pkt_t *pkt = nullptr; while ((pkt = vpx_codec_get_cx_data(mVPXContext, &iter)) != nullptr) { switch (pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: { // Copy the encoded data from libvpx to frameData frameData.AppendElements((uint8_t*)pkt->data.frame.buf, pkt->data.frame.sz); break; } default: { break; } } // End of frame if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) { if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { frameType = EncodedFrame::VP8_I_FRAME; } break; } } if (!frameData.IsEmpty() && (pkt->data.frame.pts == mEncodedTimestamp)) { // Copy the encoded data to aData. EncodedFrame* videoData = new EncodedFrame(); videoData->SetFrameType(frameType); // Convert the timestamp and duration to Usecs. CheckedInt64 timestamp = FramesToUsecs(mEncodedTimestamp, mTrackRate); if (timestamp.isValid()) { videoData->SetTimeStamp( (uint64_t)FramesToUsecs(mEncodedTimestamp, mTrackRate).value()); } CheckedInt64 duration = FramesToUsecs(pkt->data.frame.duration, mTrackRate); if (duration.isValid()) { videoData->SetDuration( (uint64_t)FramesToUsecs(pkt->data.frame.duration, mTrackRate).value()); } rv = videoData->SwapInFrameData(frameData); NS_ENSURE_SUCCESS(rv, rv); VP8LOG("GetEncodedPartitions TimeStamp %lld Duration %lld\n", videoData->GetTimeStamp(), videoData->GetDuration()); VP8LOG("frameType %d\n", videoData->GetFrameType()); aData.AppendEncodedFrame(videoData); } return NS_OK; }
bool MediaPluginReader::DecodeAudioData() { NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); // This is the approximate byte position in the stream. int64_t pos = mDecoder->GetResource()->Tell(); // Read next frame MPAPI::AudioFrame frame; if (!mPlugin->ReadAudio(mPlugin, &frame, mAudioSeekTimeUs)) { return false; } mAudioSeekTimeUs = -1; // Ignore empty buffers which stagefright media read will sporadically return if (frame.mSize == 0) return true; nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frame.mSize/2] ); memcpy(buffer.get(), frame.mData, frame.mSize); uint32_t frames = frame.mSize / (2 * frame.mAudioChannels); CheckedInt64 duration = FramesToUsecs(frames, frame.mAudioSampleRate); if (!duration.isValid()) { return false; } mAudioQueue.Push(new AudioData(pos, frame.mTimeUs, duration.value(), frames, buffer.forget(), frame.mAudioChannels)); return true; }
int64_t AudioSink::GetEndTime() const { CheckedInt64 playedUsecs = FramesToUsecs(mWritten, mInfo.mRate) + mStartTime; if (!playedUsecs.isValid()) { NS_WARNING("Int overflow calculating audio end time"); return -1; } return playedUsecs.value(); }
uint32_t AudioSink::PushProcessedAudio(AudioData* aData) { if (!aData || !aData->mFrames) { return 0; } mProcessedQueue.Push(aData); mProcessedQueueLength += FramesToUsecs(aData->mFrames, mOutputRate).value(); return aData->mFrames; }
nsresult GonkAudioDecoderManager::CreateAudioData(int64_t aStreamOffset, AudioData **v) { if (!(mAudioBuffer != nullptr && mAudioBuffer->data() != nullptr)) { GADM_LOG("Audio Buffer is not valid!"); return NS_ERROR_UNEXPECTED; } int64_t timeUs; if (!mAudioBuffer->meta_data()->findInt64(kKeyTime, &timeUs)) { return NS_ERROR_UNEXPECTED; } if (mAudioBuffer->range_length() == 0) { // Some decoders may return spurious empty buffers that we just want to ignore // quoted from Android's AwesomePlayer.cpp ReleaseAudioBuffer(); return NS_ERROR_NOT_AVAILABLE; } if (mLastDecodedTime > timeUs) { ReleaseAudioBuffer(); GADM_LOG("Output decoded sample time is revert. time=%lld", timeUs); MOZ_ASSERT(false); return NS_ERROR_NOT_AVAILABLE; } mLastDecodedTime = timeUs; const uint8_t *data = static_cast<const uint8_t*>(mAudioBuffer->data()); size_t dataOffset = mAudioBuffer->range_offset(); size_t size = mAudioBuffer->range_length(); nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[size/2]); memcpy(buffer.get(), data+dataOffset, size); uint32_t frames = size / (2 * mAudioChannels); CheckedInt64 duration = FramesToUsecs(frames, mAudioRate); if (!duration.isValid()) { return NS_ERROR_UNEXPECTED; } nsRefPtr<AudioData> audioData = new AudioData(aStreamOffset, timeUs, duration.value(), frames, buffer.forget(), mAudioChannels, mAudioRate); ReleaseAudioBuffer(); audioData.forget(v); return NS_OK; }
int64_t DecodedStream::GetEndTime(TrackType aType) const { AssertOwnerThread(); if (aType == TrackInfo::kAudioTrack && mInfo.HasAudio() && mData) { CheckedInt64 t = mStartTime.ref() + FramesToUsecs(mData->mAudioFramesWritten, mInfo.mAudio.mRate); if (t.isValid()) { return t.value(); } } else if (aType == TrackInfo::kVideoTrack && mData) { return mData->mNextVideoTime; } return -1; }
nsresult GonkAudioDecoderManager::CreateAudioData(MediaBuffer* aBuffer, int64_t aStreamOffset) { if (!(aBuffer != nullptr && aBuffer->data() != nullptr)) { GADM_LOG("Audio Buffer is not valid!"); return NS_ERROR_UNEXPECTED; } int64_t timeUs; if (!aBuffer->meta_data()->findInt64(kKeyTime, &timeUs)) { return NS_ERROR_UNEXPECTED; } if (aBuffer->range_length() == 0) { // Some decoders may return spurious empty buffers that we just want to ignore // quoted from Android's AwesomePlayer.cpp return NS_ERROR_NOT_AVAILABLE; } if (mLastTime > timeUs) { GADM_LOG("Output decoded sample time is revert. time=%lld", timeUs); MOZ_ASSERT(false); return NS_ERROR_NOT_AVAILABLE; } mLastTime = timeUs; const uint8_t *data = static_cast<const uint8_t*>(aBuffer->data()); size_t dataOffset = aBuffer->range_offset(); size_t size = aBuffer->range_length(); uint32_t frames = size / (2 * mAudioChannels); CheckedInt64 duration = FramesToUsecs(frames, mAudioRate); if (!duration.isValid()) { return NS_ERROR_UNEXPECTED; } typedef AudioCompactor::NativeCopy OmxCopy; mAudioCompactor.Push(aStreamOffset, timeUs, mAudioRate, frames, mAudioChannels, OmxCopy(data+dataOffset, size, mAudioChannels)); return NS_OK; }
/** * Compares the elapsed time from the beginning of GetEncodedTrack and * the processed frame duration in mSourceSegment * in order to set the nextEncodeOperation for next target frame. */ VP8TrackEncoder::EncodeOperation VP8TrackEncoder::GetNextEncodeOperation(TimeDuration aTimeElapsed, TrackTicks aProcessedDuration) { int64_t durationInUsec = FramesToUsecs(aProcessedDuration + mEncodedFrameDuration, mTrackRate).value(); if (aTimeElapsed.ToMicroseconds() > (durationInUsec * SKIP_FRAME_RATIO)) { // The encoder is too slow. // We should skip next frame to consume the mSourceSegment. return SKIP_FRAME; } else if (aTimeElapsed.ToMicroseconds() > (durationInUsec * I_FRAME_RATIO)) { // The encoder is a little slow. // We force the encoder to encode an I-frame to accelerate. return ENCODE_I_FRAME; } else { return ENCODE_NORMAL_FRAME; } }
nsresult SeekTask::DropAudioUpToSeekTarget(MediaData* aSample) { AssertOwnerThread(); RefPtr<AudioData> audio(aSample->As<AudioData>()); MOZ_ASSERT(audio && mSeekJob.Exists() && mSeekJob.mTarget.IsAccurate()); CheckedInt64 sampleDuration = FramesToUsecs(audio->mFrames, mAudioRate); if (!sampleDuration.isValid()) { return NS_ERROR_FAILURE; } if (audio->mTime + sampleDuration.value() <= mSeekJob.mTarget.GetTime().ToMicroseconds()) { // Our seek target lies after the frames in this AudioData. Don't // push it onto the audio queue, and keep decoding forwards. return NS_OK; } if (audio->mTime > mSeekJob.mTarget.GetTime().ToMicroseconds()) { // The seek target doesn't lie in the audio block just after the last // audio frames we've seen which were before the seek target. This // could have been the first audio data we've seen after seek, i.e. the // seek terminated after the seek target in the audio stream. Just // abort the audio decode-to-target, the state machine will play // silence to cover the gap. Typically this happens in poorly muxed // files. DECODER_WARN("Audio not synced after seek, maybe a poorly muxed file?"); mSeekedAudioData = audio; return NS_OK; } // The seek target lies somewhere in this AudioData's frames, strip off // any frames which lie before the seek target, so we'll begin playback // exactly at the seek target. NS_ASSERTION(mSeekJob.mTarget.GetTime().ToMicroseconds() >= audio->mTime, "Target must at or be after data start."); NS_ASSERTION(mSeekJob.mTarget.GetTime().ToMicroseconds() < audio->mTime + sampleDuration.value(), "Data must end after target."); CheckedInt64 framesToPrune = UsecsToFrames(mSeekJob.mTarget.GetTime().ToMicroseconds() - audio->mTime, mAudioRate); if (!framesToPrune.isValid()) { return NS_ERROR_FAILURE; } if (framesToPrune.value() > audio->mFrames) { // We've messed up somehow. Don't try to trim frames, the |frames| // variable below will overflow. DECODER_WARN("Can't prune more frames that we have!"); return NS_ERROR_FAILURE; } uint32_t frames = audio->mFrames - static_cast<uint32_t>(framesToPrune.value()); uint32_t channels = audio->mChannels; AlignedAudioBuffer audioData(frames * channels); if (!audioData) { return NS_ERROR_OUT_OF_MEMORY; } memcpy(audioData.get(), audio->mAudioData.get() + (framesToPrune.value() * channels), frames * channels * sizeof(AudioDataValue)); CheckedInt64 duration = FramesToUsecs(frames, mAudioRate); if (!duration.isValid()) { return NS_ERROR_FAILURE; } RefPtr<AudioData> data(new AudioData(audio->mOffset, mSeekJob.mTarget.GetTime().ToMicroseconds(), duration.value(), frames, Move(audioData), channels, audio->mRate)); MOZ_ASSERT(!mSeekedAudioData, "Should be the 1st sample after seeking"); mSeekedAudioData = data; return NS_OK; }
bool WMFReader::DecodeAudioData() { MOZ_ASSERT(OnTaskQueue()); HRESULT hr; hr = mSourceReader->ReadSample(MF_SOURCE_READER_FIRST_AUDIO_STREAM, 0, // control flags 0, // read stream index nullptr, nullptr, nullptr); if (FAILED(hr)) { DECODER_LOG("WMFReader::DecodeAudioData() ReadSample failed with hr=0x%x", hr); // End the stream. return false; } DWORD flags = 0; LONGLONG timestampHns = 0; RefPtr<IMFSample> sample; hr = mSourceReaderCallback->Wait(&flags, ×tampHns, byRef(sample)); if (FAILED(hr) || (flags & MF_SOURCE_READERF_ERROR) || (flags & MF_SOURCE_READERF_ENDOFSTREAM) || (flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED)) { DECODER_LOG("WMFReader::DecodeAudioData() ReadSample failed with hr=0x%x flags=0x%x", hr, flags); // End the stream. return false; } if (!sample) { // Not enough data? Try again... return true; } RefPtr<IMFMediaBuffer> buffer; hr = sample->ConvertToContiguousBuffer(byRef(buffer)); NS_ENSURE_TRUE(SUCCEEDED(hr), false); BYTE* data = nullptr; // Note: *data will be owned by the IMFMediaBuffer, we don't need to free it. DWORD maxLength = 0, currentLength = 0; hr = buffer->Lock(&data, &maxLength, ¤tLength); NS_ENSURE_TRUE(SUCCEEDED(hr), false); uint32_t numFrames = currentLength / mAudioBytesPerSample / mAudioChannels; NS_ASSERTION(sizeof(AudioDataValue) == mAudioBytesPerSample, "Size calculation is wrong"); nsAutoArrayPtr<AudioDataValue> pcmSamples(new AudioDataValue[numFrames * mAudioChannels]); memcpy(pcmSamples.get(), data, currentLength); buffer->Unlock(); // We calculate the timestamp and the duration based on the number of audio // frames we've already played. We don't trust the timestamp stored on the // IMFSample, as sometimes it's wrong, possibly due to buggy encoders? // If this sample block comes after a discontinuity (i.e. a gap or seek) // reset the frame counters, and capture the timestamp. Future timestamps // will be offset from this block's timestamp. UINT32 discontinuity = false; sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity); if (mMustRecaptureAudioPosition || discontinuity) { mAudioFrameSum = 0; hr = HNsToFrames(timestampHns, mAudioRate, &mAudioFrameOffset); NS_ENSURE_TRUE(SUCCEEDED(hr), false); mMustRecaptureAudioPosition = false; } int64_t timestamp; hr = FramesToUsecs(mAudioFrameOffset + mAudioFrameSum, mAudioRate, ×tamp); NS_ENSURE_TRUE(SUCCEEDED(hr), false); mAudioFrameSum += numFrames; int64_t duration; hr = FramesToUsecs(numFrames, mAudioRate, &duration); NS_ENSURE_TRUE(SUCCEEDED(hr), false); mAudioQueue.Push(new AudioData(mDecoder->GetResource()->Tell(), timestamp, duration, numFrames, pcmSamples.forget(), mAudioChannels, mAudioRate)); #ifdef LOG_SAMPLE_DECODE DECODER_LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u", timestamp, duration, currentLength); #endif return true; }
MediaResult VorbisDataDecoder::DoDecode(MediaRawData* aSample) { MOZ_ASSERT(mTaskQueue->IsCurrentThreadIn()); const unsigned char* aData = aSample->Data(); size_t aLength = aSample->Size(); int64_t aOffset = aSample->mOffset; uint64_t aTstampUsecs = aSample->mTime; int64_t aTotalFrames = 0; MOZ_ASSERT(mPacketCount >= 3); if (!mLastFrameTime || mLastFrameTime.ref() != aSample->mTime) { // We are starting a new block. mFrames = 0; mLastFrameTime = Some(aSample->mTime); } ogg_packet pkt = InitVorbisPacket(aData, aLength, false, aSample->mEOS, aSample->mTimecode, mPacketCount++); int err = vorbis_synthesis(&mVorbisBlock, &pkt); if (err) { return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("vorbis_synthesis:%d", err)); } err = vorbis_synthesis_blockin(&mVorbisDsp, &mVorbisBlock); if (err) { return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("vorbis_synthesis_blockin:%d", err)); } VorbisPCMValue** pcm = 0; int32_t frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm); if (frames == 0) { return NS_OK; } while (frames > 0) { uint32_t channels = mVorbisDsp.vi->channels; uint32_t rate = mVorbisDsp.vi->rate; AlignedAudioBuffer buffer(frames*channels); if (!buffer) { return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__); } for (uint32_t j = 0; j < channels; ++j) { VorbisPCMValue* channel = pcm[j]; for (uint32_t i = 0; i < uint32_t(frames); ++i) { buffer[i*channels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]); } } CheckedInt64 duration = FramesToUsecs(frames, rate); if (!duration.isValid()) { return MediaResult(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL("Overflow converting audio duration")); } CheckedInt64 total_duration = FramesToUsecs(mFrames, rate); if (!total_duration.isValid()) { return MediaResult( NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL("Overflow converting audio total_duration")); } CheckedInt64 time = total_duration + aTstampUsecs; if (!time.isValid()) { return MediaResult( NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, RESULT_DETAIL("Overflow adding total_duration and aTstampUsecs")); }; if (!mAudioConverter) { AudioConfig in(AudioConfig::ChannelLayout(channels, VorbisLayout(channels)), rate); AudioConfig out(channels, rate); if (!in.IsValid() || !out.IsValid()) { return MediaResult( NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("Invalid channel layout:%u", channels)); } mAudioConverter = MakeUnique<AudioConverter>(in, out); } MOZ_ASSERT(mAudioConverter->CanWorkInPlace()); AudioSampleBuffer data(Move(buffer)); data = mAudioConverter->Process(Move(data)); aTotalFrames += frames; mCallback->Output(new AudioData(aOffset, time.value(), duration.value(), frames, data.Forget(), channels, rate)); mFrames += frames; err = vorbis_synthesis_read(&mVorbisDsp, frames); if (err) { return MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR, RESULT_DETAIL("vorbis_synthesis_read:%d", err)); } frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm); } return NS_OK; }
UniquePtr<AudioStream::Chunk> AudioSink::PopFrames(uint32_t aFrames) { class Chunk : public AudioStream::Chunk { public: Chunk(AudioData* aBuffer, uint32_t aFrames, AudioDataValue* aData) : mBuffer(aBuffer), mFrames(aFrames), mData(aData) {} Chunk() : mFrames(0), mData(nullptr) {} const AudioDataValue* Data() const override { return mData; } uint32_t Frames() const override { return mFrames; } uint32_t Channels() const override { return mBuffer ? mBuffer->mChannels: 0; } uint32_t Rate() const override { return mBuffer ? mBuffer->mRate : 0; } AudioDataValue* GetWritable() const override { return mData; } private: const RefPtr<AudioData> mBuffer; const uint32_t mFrames; AudioDataValue* const mData; }; bool needPopping = false; if (!mCurrentData) { // No data in the queue. Return an empty chunk. if (!mProcessedQueue.GetSize()) { return MakeUnique<Chunk>(); } // We need to update our values prior popping the processed queue in // order to prevent the pop event to fire too early (prior // mProcessedQueueLength being updated) or prevent HasUnplayedFrames // to incorrectly return true during the time interval betweeen the // when mProcessedQueue is read and mWritten is updated. needPopping = true; mCurrentData = mProcessedQueue.PeekFront(); { MonitorAutoLock mon(mMonitor); mCursor = MakeUnique<AudioBufferCursor>(mCurrentData->mAudioData.get(), mCurrentData->mChannels, mCurrentData->mFrames); } MOZ_ASSERT(mCurrentData->mFrames > 0); mProcessedQueueLength -= FramesToUsecs(mCurrentData->mFrames, mOutputRate).value(); } auto framesToPop = std::min(aFrames, mCursor->Available()); SINK_LOG_V("playing audio at time=%" PRId64 " offset=%u length=%u", mCurrentData->mTime.ToMicroseconds(), mCurrentData->mFrames - mCursor->Available(), framesToPop); UniquePtr<AudioStream::Chunk> chunk = MakeUnique<Chunk>(mCurrentData, framesToPop, mCursor->Ptr()); { MonitorAutoLock mon(mMonitor); mWritten += framesToPop; mCursor->Advance(framesToPop); } // All frames are popped. Reset mCurrentData so we can pop new elements from // the audio queue in next calls to PopFrames(). if (!mCursor->Available()) { mCurrentData = nullptr; } if (needPopping) { // We can now safely pop the audio packet from the processed queue. // This will fire the popped event, triggering a call to NotifyAudioNeeded. RefPtr<AudioData> releaseMe = mProcessedQueue.PopFront(); CheckIsAudible(releaseMe); } return chunk; }
void AudioCallbackAdapter::Decoded(const nsTArray<int16_t>& aPCM, uint64_t aTimeStamp, uint32_t aChannels, uint32_t aRate) { MOZ_ASSERT(IsOnGMPThread()); if (aRate == 0 || aChannels == 0) { NS_WARNING("Invalid rate or num channels returned on GMP audio samples"); mCallback->Error(); return; } size_t numFrames = aPCM.Length() / aChannels; MOZ_ASSERT((aPCM.Length() % aChannels) == 0); nsAutoArrayPtr<AudioDataValue> audioData(new AudioDataValue[aPCM.Length()]); for (size_t i = 0; i < aPCM.Length(); ++i) { audioData[i] = AudioSampleToFloat(aPCM[i]); } if (mMustRecaptureAudioPosition) { mAudioFrameSum = 0; auto timestamp = UsecsToFrames(aTimeStamp, aRate); if (!timestamp.isValid()) { NS_WARNING("Invalid timestamp"); mCallback->Error(); return; } mAudioFrameOffset = timestamp.value(); MOZ_ASSERT(mAudioFrameOffset >= 0); mMustRecaptureAudioPosition = false; } auto timestamp = FramesToUsecs(mAudioFrameOffset + mAudioFrameSum, aRate); if (!timestamp.isValid()) { NS_WARNING("Invalid timestamp on audio samples"); mCallback->Error(); return; } mAudioFrameSum += numFrames; auto duration = FramesToUsecs(numFrames, aRate); if (!duration.isValid()) { NS_WARNING("Invalid duration on audio samples"); mCallback->Error(); return; } nsRefPtr<AudioData> audio(new AudioData(mLastStreamOffset, timestamp.value(), duration.value(), numFrames, audioData.forget(), aChannels, aRate)); #ifdef LOG_SAMPLE_DECODE LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u", timestamp, duration, currentLength); #endif mCallback->Output(audio); }
nsresult AppleATDecoder::DecodeSample(mp4_demuxer::MP4Sample* aSample) { // Array containing the queued decoded audio frames, about to be output. nsTArray<AudioDataValue> outputData; UInt32 channels = mOutputFormat.mChannelsPerFrame; // Pick a multiple of the frame size close to a power of two // for efficient allocation. const uint32_t MAX_AUDIO_FRAMES = 128; const uint32_t maxDecodedSamples = MAX_AUDIO_FRAMES * channels; // Descriptions for _decompressed_ audio packets. ignored. nsAutoArrayPtr<AudioStreamPacketDescription> packets(new AudioStreamPacketDescription[MAX_AUDIO_FRAMES]); // This API insists on having packets spoon-fed to it from a callback. // This structure exists only to pass our state. PassthroughUserData userData = { channels, (UInt32)aSample->size, aSample->data }; // Decompressed audio buffer nsAutoArrayPtr<AudioDataValue> decoded(new AudioDataValue[maxDecodedSamples]); do { AudioBufferList decBuffer; decBuffer.mNumberBuffers = 1; decBuffer.mBuffers[0].mNumberChannels = channels; decBuffer.mBuffers[0].mDataByteSize = maxDecodedSamples * sizeof(AudioDataValue); decBuffer.mBuffers[0].mData = decoded.get(); // in: the max number of packets we can handle from the decoder. // out: the number of packets the decoder is actually returning. UInt32 numFrames = MAX_AUDIO_FRAMES; OSStatus rv = AudioConverterFillComplexBuffer(mConverter, _PassthroughInputDataCallback, &userData, &numFrames /* in/out */, &decBuffer, packets.get()); if (rv && rv != kNoMoreDataErr) { LOG("Error decoding audio stream: %d\n", rv); return NS_ERROR_FAILURE; } if (numFrames) { outputData.AppendElements(decoded.get(), numFrames * channels); } if (rv == kNoMoreDataErr) { break; } } while (true); if (outputData.IsEmpty()) { return NS_OK; } size_t numFrames = outputData.Length() / channels; int rate = mOutputFormat.mSampleRate; CheckedInt<Microseconds> duration = FramesToUsecs(numFrames, rate); if (!duration.isValid()) { NS_WARNING("Invalid count of accumulated audio samples"); return NS_ERROR_FAILURE; } #ifdef LOG_SAMPLE_DECODE LOG("pushed audio at time %lfs; duration %lfs\n", (double)aSample->composition_timestamp / USECS_PER_S, (double)duration.value() / USECS_PER_S); #endif nsAutoArrayPtr<AudioDataValue> data(new AudioDataValue[outputData.Length()]); PodCopy(data.get(), &outputData[0], outputData.Length()); nsRefPtr<AudioData> audio = new AudioData(aSample->byte_offset, aSample->composition_timestamp, duration.value(), numFrames, data.forget(), channels, rate); mCallback->Output(audio); return NS_OK; }
void AppleATDecoder::SampleCallback(uint32_t aNumBytes, uint32_t aNumPackets, const void* aData, AudioStreamPacketDescription* aPackets) { // Pick a multiple of the frame size close to a power of two // for efficient allocation. const uint32_t MAX_AUDIO_FRAMES = 128; const uint32_t decodedSize = MAX_AUDIO_FRAMES * mConfig.channel_count * sizeof(AudioDataValue); // Descriptions for _decompressed_ audio packets. ignored. nsAutoArrayPtr<AudioStreamPacketDescription> packets(new AudioStreamPacketDescription[MAX_AUDIO_FRAMES]); // This API insists on having packets spoon-fed to it from a callback. // This structure exists only to pass our state and the result of the // parser on to the callback above. PassthroughUserData userData = { this, aNumPackets, aNumBytes, aData, aPackets, false }; do { // Decompressed audio buffer nsAutoArrayPtr<uint8_t> decoded(new uint8_t[decodedSize]); AudioBufferList decBuffer; decBuffer.mNumberBuffers = 1; decBuffer.mBuffers[0].mNumberChannels = mOutputFormat.mChannelsPerFrame; decBuffer.mBuffers[0].mDataByteSize = decodedSize; decBuffer.mBuffers[0].mData = decoded.get(); // in: the max number of packets we can handle from the decoder. // out: the number of packets the decoder is actually returning. UInt32 numFrames = MAX_AUDIO_FRAMES; OSStatus rv = AudioConverterFillComplexBuffer(mConverter, _PassthroughInputDataCallback, &userData, &numFrames /* in/out */, &decBuffer, packets.get()); if (rv && rv != kNeedMoreData) { LOG("Error decoding audio stream: %#x\n", rv); mCallback->Error(); break; } LOG("%d frames decoded", numFrames); // If we decoded zero frames then AudioConverterFillComplexBuffer is out // of data to provide. We drained its internal buffer completely on the // last pass. if (numFrames == 0 && rv == kNeedMoreData) { LOG("FillComplexBuffer out of data exactly\n"); mCallback->InputExhausted(); break; } const int rate = mOutputFormat.mSampleRate; const int channels = mOutputFormat.mChannelsPerFrame; int64_t time = mCurrentAudioTimestamp; int64_t duration = FramesToUsecs(numFrames, rate).value(); LOG("pushed audio at time %lfs; duration %lfs\n", (double)time / USECS_PER_S, (double)duration / USECS_PER_S); AudioData* audio = new AudioData(mSamplePosition, time, duration, numFrames, reinterpret_cast<AudioDataValue*>(decoded.forget()), channels, rate); mCallback->Output(audio); mHaveOutput = true; if (rv == kNeedMoreData) { // No error; we just need more data. LOG("FillComplexBuffer out of data\n"); mCallback->InputExhausted(); break; } } while (true); }
/* * This callback is called when |AudioFileStreamParseBytes| has enough data to * extract one or more MP3 packets. */ void AppleMP3Reader::AudioSampleCallback(UInt32 aNumBytes, UInt32 aNumPackets, const void *aData, AudioStreamPacketDescription *aPackets) { LOGD("got %u bytes, %u packets\n", aNumBytes, aNumPackets); // 1 frame per packet * num channels * 32-bit float uint32_t decodedSize = MAX_AUDIO_FRAMES * mAudioChannels * sizeof(AudioDataValue); // descriptions for _decompressed_ audio packets. ignored. nsAutoArrayPtr<AudioStreamPacketDescription> packets(new AudioStreamPacketDescription[MAX_AUDIO_FRAMES]); // This API insists on having MP3 packets spoon-fed to it from a callback. // This structure exists only to pass our state and the result of the parser // on to the callback above. PassthroughUserData userData = { this, aNumPackets, aNumBytes, aData, aPackets, false }; do { // Decompressed audio buffer nsAutoArrayPtr<uint8_t> decoded(new uint8_t[decodedSize]); AudioBufferList decBuffer; decBuffer.mNumberBuffers = 1; decBuffer.mBuffers[0].mNumberChannels = mAudioChannels; decBuffer.mBuffers[0].mDataByteSize = decodedSize; decBuffer.mBuffers[0].mData = decoded.get(); // in: the max number of packets we can handle from the decoder. // out: the number of packets the decoder is actually returning. UInt32 numFrames = MAX_AUDIO_FRAMES; OSStatus rv = AudioConverterFillComplexBuffer(mAudioConverter, PassthroughInputDataCallback, &userData, &numFrames /* in/out */, &decBuffer, packets.get()); if (rv && rv != kNeedMoreData) { LOGE("Error decoding audio stream: %x\n", rv); break; } // If we decoded zero frames then AudiOConverterFillComplexBuffer is out // of data to provide. We drained its internal buffer completely on the // last pass. if (numFrames == 0 && rv == kNeedMoreData) { LOGD("FillComplexBuffer out of data exactly\n"); break; } int64_t time = FramesToUsecs(mCurrentAudioFrame, mAudioSampleRate).value(); int64_t duration = FramesToUsecs(numFrames, mAudioSampleRate).value(); LOGD("pushed audio at time %lfs; duration %lfs\n", (double)time / USECS_PER_S, (double)duration / USECS_PER_S); AudioData *audio = new AudioData(mDecoder->GetResource()->Tell(), time, duration, numFrames, reinterpret_cast<AudioDataValue *>(decoded.forget()), mAudioChannels, mAudioSampleRate); mAudioQueue.Push(audio); mCurrentAudioFrame += numFrames; if (rv == kNeedMoreData) { // No error; we just need more data. LOGD("FillComplexBuffer out of data\n"); break; } } while (true); }
HRESULT WMFAudioMFTManager::Output(int64_t aStreamOffset, nsAutoPtr<MediaData>& aOutData) { aOutData = nullptr; RefPtr<IMFSample> sample; HRESULT hr; while (true) { hr = mDecoder->Output(&sample); if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) { return hr; } if (hr == MF_E_TRANSFORM_STREAM_CHANGE) { hr = UpdateOutputType(); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); continue; } break; } NS_ENSURE_TRUE(SUCCEEDED(hr), hr); RefPtr<IMFMediaBuffer> buffer; hr = sample->ConvertToContiguousBuffer(byRef(buffer)); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); BYTE* data = nullptr; // Note: *data will be owned by the IMFMediaBuffer, we don't need to free it. DWORD maxLength = 0, currentLength = 0; hr = buffer->Lock(&data, &maxLength, ¤tLength); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); // Sometimes when starting decoding, the AAC decoder gives us samples // with a negative timestamp. AAC does usually have preroll (or encoder // delay) encoded into its bitstream, but the amount encoded to the stream // is variable, and it not signalled in-bitstream. There is sometimes // signalling in the MP4 container what the preroll amount, but it's // inconsistent. It looks like WMF's AAC encoder may take this into // account, so strip off samples with a negative timestamp to get us // to a 0-timestamp start. This seems to maintain A/V sync, so we can run // with this until someone complains... // We calculate the timestamp and the duration based on the number of audio // frames we've already played. We don't trust the timestamp stored on the // IMFSample, as sometimes it's wrong, possibly due to buggy encoders? // If this sample block comes after a discontinuity (i.e. a gap or seek) // reset the frame counters, and capture the timestamp. Future timestamps // will be offset from this block's timestamp. UINT32 discontinuity = false; int32_t numFramesToStrip = 0; sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity); if (mMustRecaptureAudioPosition || discontinuity) { // Update the output type, in case this segment has a different // rate. This also triggers on the first sample, which can have a // different rate than is advertised in the container, and sometimes we // don't get a MF_E_TRANSFORM_STREAM_CHANGE when the rate changes. hr = UpdateOutputType(); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); mAudioFrameSum = 0; LONGLONG timestampHns = 0; hr = sample->GetSampleTime(×tampHns); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); hr = HNsToFrames(timestampHns, mAudioRate, &mAudioFrameOffset); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); if (mAudioFrameOffset < 0) { // First sample has a negative timestamp. Strip off the samples until // we reach positive territory. numFramesToStrip = -mAudioFrameOffset; mAudioFrameOffset = 0; } mMustRecaptureAudioPosition = false; } MOZ_ASSERT(numFramesToStrip >= 0); int32_t numSamples = currentLength / mAudioBytesPerSample; int32_t numFrames = numSamples / mAudioChannels; int32_t offset = std::min<int32_t>(numFramesToStrip, numFrames); numFrames -= offset; numSamples -= offset * mAudioChannels; MOZ_ASSERT(numFrames >= 0); MOZ_ASSERT(numSamples >= 0); if (numFrames == 0) { // All data from this chunk stripped, loop back and try to output the next // frame, if possible. return S_OK; } nsAutoArrayPtr<AudioDataValue> audioData(new AudioDataValue[numSamples]); // Just assume PCM output for now... MOZ_ASSERT(mAudioBytesPerSample == 2); int16_t* pcm = ((int16_t*)data) + (offset * mAudioChannels); MOZ_ASSERT(pcm >= (int16_t*)data); MOZ_ASSERT(pcm <= (int16_t*)(data + currentLength)); MOZ_ASSERT(pcm+numSamples <= (int16_t*)(data + currentLength)); for (int32_t i = 0; i < numSamples; ++i) { audioData[i] = AudioSampleToFloat(pcm[i]); } buffer->Unlock(); int64_t timestamp; hr = FramesToUsecs(mAudioFrameOffset + mAudioFrameSum, mAudioRate, ×tamp); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); mAudioFrameSum += numFrames; int64_t duration; hr = FramesToUsecs(numFrames, mAudioRate, &duration); NS_ENSURE_TRUE(SUCCEEDED(hr), hr); aOutData = new AudioData(aStreamOffset, timestamp, duration, numFrames, audioData.forget(), mAudioChannels, mAudioRate); #ifdef LOG_SAMPLE_DECODE LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u", timestamp, duration, currentLength); #endif return S_OK; }
int VorbisDataDecoder::DoDecode(MediaRawData* aSample) { const unsigned char* aData = aSample->Data(); size_t aLength = aSample->Size(); int64_t aOffset = aSample->mOffset; uint64_t aTstampUsecs = aSample->mTime; int64_t aTotalFrames = 0; MOZ_ASSERT(mPacketCount >= 3); if (!mLastFrameTime || mLastFrameTime.ref() != aSample->mTime) { // We are starting a new block. mFrames = 0; mLastFrameTime = Some(aSample->mTime); } ogg_packet pkt = InitVorbisPacket(aData, aLength, false, false, -1, mPacketCount++); bool first_packet = mPacketCount == 4; if (vorbis_synthesis(&mVorbisBlock, &pkt) != 0) { return -1; } if (vorbis_synthesis_blockin(&mVorbisDsp, &mVorbisBlock) != 0) { return -1; } VorbisPCMValue** pcm = 0; int32_t frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm); // If the first packet of audio in the media produces no data, we // still need to produce an AudioData for it so that the correct media // start time is calculated. Otherwise we'd end up with a media start // time derived from the timecode of the first packet that produced // data. if (frames == 0 && first_packet) { mCallback->Output(new AudioData(aOffset, aTstampUsecs, 0, 0, nullptr, mVorbisDsp.vi->channels, mVorbisDsp.vi->rate)); } while (frames > 0) { uint32_t channels = mVorbisDsp.vi->channels; auto buffer = MakeUnique<AudioDataValue[]>(frames*channels); for (uint32_t j = 0; j < channels; ++j) { VorbisPCMValue* channel = pcm[j]; for (uint32_t i = 0; i < uint32_t(frames); ++i) { buffer[i*channels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]); } } CheckedInt64 duration = FramesToUsecs(frames, mVorbisDsp.vi->rate); if (!duration.isValid()) { NS_WARNING("Int overflow converting WebM audio duration"); return -1; } CheckedInt64 total_duration = FramesToUsecs(mFrames, mVorbisDsp.vi->rate); if (!total_duration.isValid()) { NS_WARNING("Int overflow converting WebM audio total_duration"); return -1; } CheckedInt64 time = total_duration + aTstampUsecs; if (!time.isValid()) { NS_WARNING("Int overflow adding total_duration and aTstampUsecs"); return -1; }; aTotalFrames += frames; mCallback->Output(new AudioData(aOffset, time.value(), duration.value(), frames, Move(buffer), mVorbisDsp.vi->channels, mVorbisDsp.vi->rate)); mFrames += frames; if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) { return -1; } frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm); } return aTotalFrames > 0 ? 1 : 0; }
nsresult MediaDecoderReader::DecodeToTarget(int64_t aTarget) { DECODER_LOG(PR_LOG_DEBUG, ("MediaDecoderReader::DecodeToTarget(%lld) Begin", aTarget)); // Decode forward to the target frame. Start with video, if we have it. if (HasVideo()) { bool eof = false; int64_t startTime = -1; nsAutoPtr<VideoData> video; while (HasVideo() && !eof) { while (VideoQueue().GetSize() == 0 && !eof) { bool skip = false; eof = !DecodeVideoFrame(skip, 0); { ReentrantMonitorAutoEnter decoderMon(mDecoder->GetReentrantMonitor()); if (mDecoder->IsShutdown()) { return NS_ERROR_FAILURE; } } } if (VideoQueue().GetSize() == 0) { // Hit end of file, we want to display the last frame of the video. if (video) { VideoQueue().PushFront(video.forget()); } break; } video = VideoQueue().PeekFront(); // If the frame end time is less than the seek target, we won't want // to display this frame after the seek, so discard it. if (video && video->GetEndTime() <= aTarget) { if (startTime == -1) { startTime = video->mTime; } VideoQueue().PopFront(); } else { video.forget(); break; } } { ReentrantMonitorAutoEnter decoderMon(mDecoder->GetReentrantMonitor()); if (mDecoder->IsShutdown()) { return NS_ERROR_FAILURE; } } DECODER_LOG(PR_LOG_DEBUG, ("First video frame after decode is %lld", startTime)); } if (HasAudio()) { // Decode audio forward to the seek target. bool eof = false; while (HasAudio() && !eof) { while (!eof && AudioQueue().GetSize() == 0) { eof = !DecodeAudioData(); { ReentrantMonitorAutoEnter decoderMon(mDecoder->GetReentrantMonitor()); if (mDecoder->IsShutdown()) { return NS_ERROR_FAILURE; } } } const AudioData* audio = AudioQueue().PeekFront(); if (!audio) break; CheckedInt64 startFrame = UsecsToFrames(audio->mTime, mInfo.mAudio.mRate); CheckedInt64 targetFrame = UsecsToFrames(aTarget, mInfo.mAudio.mRate); if (!startFrame.isValid() || !targetFrame.isValid()) { return NS_ERROR_FAILURE; } if (startFrame.value() + audio->mFrames <= targetFrame.value()) { // Our seek target lies after the frames in this AudioData. Pop it // off the queue, and keep decoding forwards. delete AudioQueue().PopFront(); audio = nullptr; continue; } if (startFrame.value() > targetFrame.value()) { // The seek target doesn't lie in the audio block just after the last // audio frames we've seen which were before the seek target. This // could have been the first audio data we've seen after seek, i.e. the // seek terminated after the seek target in the audio stream. Just // abort the audio decode-to-target, the state machine will play // silence to cover the gap. Typically this happens in poorly muxed // files. NS_WARNING("Audio not synced after seek, maybe a poorly muxed file?"); break; } // The seek target lies somewhere in this AudioData's frames, strip off // any frames which lie before the seek target, so we'll begin playback // exactly at the seek target. NS_ASSERTION(targetFrame.value() >= startFrame.value(), "Target must at or be after data start."); NS_ASSERTION(targetFrame.value() < startFrame.value() + audio->mFrames, "Data must end after target."); int64_t framesToPrune = targetFrame.value() - startFrame.value(); if (framesToPrune > audio->mFrames) { // We've messed up somehow. Don't try to trim frames, the |frames| // variable below will overflow. NS_WARNING("Can't prune more frames that we have!"); break; } uint32_t frames = audio->mFrames - static_cast<uint32_t>(framesToPrune); uint32_t channels = audio->mChannels; nsAutoArrayPtr<AudioDataValue> audioData(new AudioDataValue[frames * channels]); memcpy(audioData.get(), audio->mAudioData.get() + (framesToPrune * channels), frames * channels * sizeof(AudioDataValue)); CheckedInt64 duration = FramesToUsecs(frames, mInfo.mAudio.mRate); if (!duration.isValid()) { return NS_ERROR_FAILURE; } nsAutoPtr<AudioData> data(new AudioData(audio->mOffset, aTarget, duration.value(), frames, audioData.forget(), channels)); delete AudioQueue().PopFront(); AudioQueue().PushFront(data.forget()); break; } } DECODER_LOG(PR_LOG_DEBUG, ("MediaDecoderReader::DecodeToTarget(%lld) End", aTarget)); return NS_OK; }