Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio) : mRange(aBox.Range()) , mMaxRoundingError(35000) { for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { if (box.IsType("traf")) { ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aDecodeTime, aIsAudio); } } if (IsValid()) { if (mIndex.Length()) { // Ensure the samples are contiguous with no gaps. nsTArray<Sample*> ctsOrder; for (auto& sample : mIndex) { ctsOrder.AppendElement(&sample); } ctsOrder.Sort(CtsComparator()); for (size_t i = 1; i < ctsOrder.Length(); i++) { ctsOrder[i-1]->mCompositionRange.end = ctsOrder[i]->mCompositionRange.start; } // In MP4, the duration of a sample is defined as the delta between two decode // timestamps. The operation above has updated the duration of each sample // as a Sample's duration is mCompositionRange.end - mCompositionRange.start // MSE's TrackBuffersManager expects dts that increased by the sample's // duration, so we rewrite the dts accordingly. int64_t presentationDuration = ctsOrder.LastElement()->mCompositionRange.end - ctsOrder[0]->mCompositionRange.start; int64_t endDecodeTime = aMdhd.ToMicroseconds((int64_t)*aDecodeTime - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset); int64_t decodeDuration = endDecodeTime - mIndex[0].mDecodeTime; double adjust = (double)decodeDuration / presentationDuration; int64_t dtsOffset = mIndex[0].mDecodeTime; int64_t compositionDuration = 0; // Adjust the dts, ensuring that the new adjusted dts will never be greater // than decodeTime (the next moof's decode start time). for (auto& sample : mIndex) { sample.mDecodeTime = dtsOffset + int64_t(compositionDuration * adjust); compositionDuration += sample.mCompositionRange.Length(); } mTimeRange = Interval<Microseconds>(ctsOrder[0]->mCompositionRange.start, ctsOrder.LastElement()->mCompositionRange.end); } ProcessCenc(); } }
bool Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio) { if (!aTfhd.IsValid() || !aMvhd.IsValid() || !aMdhd.IsValid() || !aEdts.IsValid()) { LOG(Moof, "Invalid dependencies: aTfhd(%d) aMvhd(%d) aMdhd(%d) aEdts(%d)", aTfhd.IsValid(), aMvhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid()); return false; } BoxReader reader(aBox); if (!reader->CanReadType<uint32_t>()) { LOG(Moof, "Incomplete Box (missing flags)"); return false; } uint32_t flags = reader->ReadU32(); uint8_t version = flags >> 24; if (!reader->CanReadType<uint32_t>()) { LOG(Moof, "Incomplete Box (missing sampleCount)"); return false; } uint32_t sampleCount = reader->ReadU32(); if (sampleCount == 0) { return true; } size_t need = ((flags & 1) ? sizeof(uint32_t) : 0) + ((flags & 4) ? sizeof(uint32_t) : 0); uint16_t flag[] = { 0x100, 0x200, 0x400, 0x800, 0 }; for (size_t i = 0; flag[i]; i++) { if (flags & flag[i]) { need += sizeof(uint32_t) * sampleCount; } } if (reader->Remaining() < need) { LOG(Moof, "Incomplete Box (have:%lld need:%lld)", reader->Remaining(), need); return false; } uint64_t offset = aTfhd.mBaseDataOffset + (flags & 1 ? reader->ReadU32() : 0); uint32_t firstSampleFlags = flags & 4 ? reader->ReadU32() : aTfhd.mDefaultSampleFlags; uint64_t decodeTime = *aDecodeTime; nsTArray<Interval<Microseconds>> timeRanges; if (!mIndex.SetCapacity(sampleCount, fallible)) { LOG(Moof, "Out of Memory"); return false; } for (size_t i = 0; i < sampleCount; i++) { uint32_t sampleDuration = flags & 0x100 ? reader->ReadU32() : aTfhd.mDefaultSampleDuration; uint32_t sampleSize = flags & 0x200 ? reader->ReadU32() : aTfhd.mDefaultSampleSize; uint32_t sampleFlags = flags & 0x400 ? reader->ReadU32() : i ? aTfhd.mDefaultSampleFlags : firstSampleFlags; int32_t ctsOffset = 0; if (flags & 0x800) { ctsOffset = reader->Read32(); } Sample sample; sample.mByteRange = MediaByteRange(offset, offset + sampleSize); offset += sampleSize; sample.mDecodeTime = aMdhd.ToMicroseconds((int64_t)decodeTime - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset); sample.mCompositionRange = Interval<Microseconds>( aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset), aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset)); decodeTime += sampleDuration; // Sometimes audio streams don't properly mark their samples as keyframes, // because every audio sample is a keyframe. sample.mSync = !(sampleFlags & 0x1010000) || aIsAudio; // FIXME: Make this infallible after bug 968520 is done. MOZ_ALWAYS_TRUE(mIndex.AppendElement(sample, fallible)); mMdatRange = mMdatRange.Extents(sample.mByteRange); } mMaxRoundingError += aMdhd.ToMicroseconds(sampleCount); nsTArray<Sample*> ctsOrder; for (int i = 0; i < mIndex.Length(); i++) { ctsOrder.AppendElement(&mIndex[i]); } ctsOrder.Sort(CtsComparator()); for (size_t i = 0; i < ctsOrder.Length(); i++) { if (i + 1 < ctsOrder.Length()) { ctsOrder[i]->mCompositionRange.end = ctsOrder[i + 1]->mCompositionRange.start; } } mTimeRange = Interval<Microseconds>(ctsOrder[0]->mCompositionRange.start, ctsOrder.LastElement()->mCompositionRange.end); *aDecodeTime = decodeTime; return true; }
Result<Ok, nsresult> Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio) { if (!aTfhd.IsValid() || !aMvhd.IsValid() || !aMdhd.IsValid() || !aEdts.IsValid()) { LOG(Moof, "Invalid dependencies: aTfhd(%d) aMvhd(%d) aMdhd(%d) aEdts(%d)", aTfhd.IsValid(), aMvhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid()); return Err(NS_ERROR_FAILURE); } BoxReader reader(aBox); if (!reader->CanReadType<uint32_t>()) { LOG(Moof, "Incomplete Box (missing flags)"); return Err(NS_ERROR_FAILURE); } uint32_t flags; MOZ_TRY_VAR(flags, reader->ReadU32()); uint8_t version = flags >> 24; if (!reader->CanReadType<uint32_t>()) { LOG(Moof, "Incomplete Box (missing sampleCount)"); return Err(NS_ERROR_FAILURE); } uint32_t sampleCount; MOZ_TRY_VAR(sampleCount, reader->ReadU32()); if (sampleCount == 0) { return Ok(); } uint64_t offset = aTfhd.mBaseDataOffset; if (flags & 0x01) { uint32_t tmp; MOZ_TRY_VAR(tmp, reader->ReadU32()); offset += tmp; } uint32_t firstSampleFlags = aTfhd.mDefaultSampleFlags; if (flags & 0x04) { MOZ_TRY_VAR(firstSampleFlags, reader->ReadU32()); } uint64_t decodeTime = *aDecodeTime; nsTArray<MP4Interval<Microseconds>> timeRanges; if (!mIndex.SetCapacity(sampleCount, fallible)) { LOG(Moof, "Out of Memory"); return Err(NS_ERROR_FAILURE); } for (size_t i = 0; i < sampleCount; i++) { uint32_t sampleDuration = aTfhd.mDefaultSampleDuration; if (flags & 0x100) { MOZ_TRY_VAR(sampleDuration, reader->ReadU32()); } uint32_t sampleSize = aTfhd.mDefaultSampleSize; if (flags & 0x200) { MOZ_TRY_VAR(sampleSize, reader->ReadU32()); } uint32_t sampleFlags = i ? aTfhd.mDefaultSampleFlags : firstSampleFlags; if (flags & 0x400) { MOZ_TRY_VAR(sampleFlags, reader->ReadU32()); } int32_t ctsOffset = 0; if (flags & 0x800) { MOZ_TRY_VAR(ctsOffset, reader->Read32()); } if (sampleSize) { Sample sample; sample.mByteRange = MediaByteRange(offset, offset + sampleSize); offset += sampleSize; Microseconds decodeOffset, emptyOffset, startCts, endCts; MOZ_TRY_VAR(decodeOffset, aMdhd.ToMicroseconds((int64_t)decodeTime - aEdts.mMediaStart)); MOZ_TRY_VAR(emptyOffset, aMvhd.ToMicroseconds(aEdts.mEmptyOffset)); sample.mDecodeTime = decodeOffset + emptyOffset; MOZ_TRY_VAR(startCts, aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset - aEdts.mMediaStart)); MOZ_TRY_VAR(endCts, aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart)); sample.mCompositionRange = MP4Interval<Microseconds>(startCts + emptyOffset, endCts + emptyOffset); // Sometimes audio streams don't properly mark their samples as keyframes, // because every audio sample is a keyframe. sample.mSync = !(sampleFlags & 0x1010000) || aIsAudio; // FIXME: Make this infallible after bug 968520 is done. MOZ_ALWAYS_TRUE(mIndex.AppendElement(sample, fallible)); mMdatRange = mMdatRange.Span(sample.mByteRange); } decodeTime += sampleDuration; } Microseconds roundTime; MOZ_TRY_VAR(roundTime, aMdhd.ToMicroseconds(sampleCount)); mMaxRoundingError += roundTime; *aDecodeTime = decodeTime; return Ok(); }
Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio) : mRange(aBox.Range()) , mMaxRoundingError(35000) { nsTArray<Box> psshBoxes; for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { if (box.IsType("traf")) { ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aDecodeTime, aIsAudio); } if (box.IsType("pssh")) { psshBoxes.AppendElement(box); } } // The EME spec requires that PSSH boxes which are contiguous in the // file are dispatched to the media element in a single "encrypted" event. // So append contiguous boxes here. for (size_t i = 0; i < psshBoxes.Length(); ++i) { Box box = psshBoxes[i]; if (i == 0 || box.Offset() != psshBoxes[i - 1].NextOffset()) { mPsshes.AppendElement(); } nsTArray<uint8_t>& pssh = mPsshes.LastElement(); pssh.AppendElements(box.Header()); pssh.AppendElements(box.Read()); } if (IsValid()) { if (mIndex.Length()) { // Ensure the samples are contiguous with no gaps. nsTArray<Sample*> ctsOrder; for (auto& sample : mIndex) { ctsOrder.AppendElement(&sample); } ctsOrder.Sort(CtsComparator()); for (size_t i = 1; i < ctsOrder.Length(); i++) { ctsOrder[i-1]->mCompositionRange.end = ctsOrder[i]->mCompositionRange.start; } // In MP4, the duration of a sample is defined as the delta between two decode // timestamps. The operation above has updated the duration of each sample // as a Sample's duration is mCompositionRange.end - mCompositionRange.start // MSE's TrackBuffersManager expects dts that increased by the sample's // duration, so we rewrite the dts accordingly. int64_t presentationDuration = ctsOrder.LastElement()->mCompositionRange.end - ctsOrder[0]->mCompositionRange.start; auto decodeOffset = aMdhd.ToMicroseconds((int64_t)*aDecodeTime - aEdts.mMediaStart); auto offsetOffset = aMvhd.ToMicroseconds(aEdts.mEmptyOffset); int64_t endDecodeTime = decodeOffset.isOk() & offsetOffset.isOk() ? decodeOffset.unwrap() + offsetOffset.unwrap() : 0; int64_t decodeDuration = endDecodeTime - mIndex[0].mDecodeTime; double adjust = !!presentationDuration ? (double)decodeDuration / presentationDuration : 0; int64_t dtsOffset = mIndex[0].mDecodeTime; int64_t compositionDuration = 0; // Adjust the dts, ensuring that the new adjusted dts will never be greater // than decodeTime (the next moof's decode start time). for (auto& sample : mIndex) { sample.mDecodeTime = dtsOffset + int64_t(compositionDuration * adjust); compositionDuration += sample.mCompositionRange.Length(); } mTimeRange = MP4Interval<Microseconds>(ctsOrder[0]->mCompositionRange.start, ctsOrder.LastElement()->mCompositionRange.end); } ProcessCenc(); } }