void MoofParser::ParseEncrypted(Box& aBox) { for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { // Some MP4 files have been found to have multiple sinf boxes in the same // enc* box. This does not match spec anyway, so just choose the first // one that parses properly. if (box.IsType("sinf")) { mSinf = Sinf(box); if (mSinf.IsValid()) { break; } } } }
void MoofParser::ParseTrak(Box& aBox) { Tkhd tkhd; for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { if (box.IsType("tkhd")) { tkhd = Tkhd(box); } else if (box.IsType("mdia")) { if (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId) { ParseMdia(box, tkhd); } } else if (box.IsType("edts")) { mEdts = Edts(box); } } }
Result<Ok, nsresult> Edts::Parse(Box& aBox) { Box child = aBox.FirstChild(); if (!child.IsType("elst")) { return Err(NS_ERROR_FAILURE); } BoxReader reader(child); uint32_t flags; MOZ_TRY_VAR(flags, reader->ReadU32()); uint8_t version = flags >> 24; bool emptyEntry = false; uint32_t entryCount; MOZ_TRY_VAR(entryCount, reader->ReadU32()); for (uint32_t i = 0; i < entryCount; i++) { uint64_t segment_duration; int64_t media_time; if (version == 1) { MOZ_TRY_VAR(segment_duration, reader->ReadU64()); MOZ_TRY_VAR(media_time, reader->Read64()); } else { uint32_t tmp; MOZ_TRY_VAR(tmp, reader->ReadU32()); segment_duration = tmp; int32_t tmp2; MOZ_TRY_VAR(tmp2, reader->Read32()); media_time = tmp2; } if (media_time == -1 && i) { LOG(Edts, "Multiple empty edit, not handled"); } else if (media_time == -1) { mEmptyOffset = segment_duration; emptyEntry = true; } else if (i > 1 || (i > 0 && !emptyEntry)) { LOG(Edts, "More than one edit entry, not handled. A/V sync will be wrong"); break; } else { mMediaStart = media_time; } MOZ_TRY(reader->ReadU32()); // media_rate_integer and media_rate_fraction } return Ok(); }
Edts::Edts(Box& aBox) : mMediaStart(0) { Box child = aBox.FirstChild(); if (!child.IsType("elst")) { return; } BoxReader reader(child); if (!reader->CanReadType<uint32_t>()) { LOG(Edts, "Incomplete Box (missing flags)"); return; } uint32_t flags = reader->ReadU32(); uint8_t version = flags >> 24; size_t need = sizeof(uint32_t) + 2*(version ? sizeof(int64_t) : sizeof(uint32_t)); if (reader->Remaining() < need) { LOG(Edts, "Incomplete Box (have:%lld need:%lld)", (uint64_t)reader->Remaining(), (uint64_t)need); return; } uint32_t entryCount = reader->ReadU32(); NS_ASSERTION(entryCount == 1, "Can't handle videos with multiple edits"); if (entryCount != 1) { reader->DiscardRemaining(); return; } uint64_t segment_duration; if (version == 1) { segment_duration = reader->ReadU64(); mMediaStart = reader->Read64(); } else { segment_duration = reader->ReadU32(); mMediaStart = reader->Read32(); } reader->DiscardRemaining(); }
void Moof::ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio) { MOZ_ASSERT(aDecodeTime); Tfhd tfhd(aTrex); Tfdt tfdt; for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { if (box.IsType("tfhd")) { tfhd = Tfhd(box, aTrex); } else if (!aTrex.mTrackId || tfhd.mTrackId == aTrex.mTrackId) { if (box.IsType("tfdt")) { tfdt = Tfdt(box); } else if (box.IsType("sgpd")) { Sgpd sgpd(box); if (sgpd.IsValid() && sgpd.mGroupingType == "seig") { mFragmentSampleEncryptionInfoEntries.Clear(); if (!mFragmentSampleEncryptionInfoEntries.AppendElements(sgpd.mEntries, mozilla::fallible)) { LOG(Moof, "OOM"); return; } } } else if (box.IsType("sbgp")) { Sbgp sbgp(box); if (sbgp.IsValid() && sbgp.mGroupingType == "seig") { mFragmentSampleToGroupEntries.Clear(); if (!mFragmentSampleToGroupEntries.AppendElements(sbgp.mEntries, mozilla::fallible)) { LOG(Moof, "OOM"); return; } } } else if (box.IsType("saiz")) { if (!mSaizs.AppendElement(Saiz(box, aSinf.mDefaultEncryptionType), mozilla::fallible)) { LOG(Moof, "OOM"); return; } } else if (box.IsType("saio")) { if (!mSaios.AppendElement(Saio(box, aSinf.mDefaultEncryptionType), mozilla::fallible)) { LOG(Moof, "OOM"); return; } } } } if (aTrex.mTrackId && tfhd.mTrackId != aTrex.mTrackId) { return; } // Now search for TRUN boxes. uint64_t decodeTime = tfdt.IsValid() ? tfdt.mBaseMediaDecodeTime : *aDecodeTime; for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { if (box.IsType("trun")) { if (ParseTrun(box, tfhd, aMvhd, aMdhd, aEdts, &decodeTime, aIsAudio).isOk()) { mValid = true; } else { LOG(Moof, "ParseTrun failed"); mValid = false; break; } } } *aDecodeTime = decodeTime; }
Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio) : mRange(aBox.Range()) , mMaxRoundingError(35000) { nsTArray<Box> psshBoxes; for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { if (box.IsType("traf")) { ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aDecodeTime, aIsAudio); } if (box.IsType("pssh")) { psshBoxes.AppendElement(box); } } // The EME spec requires that PSSH boxes which are contiguous in the // file are dispatched to the media element in a single "encrypted" event. // So append contiguous boxes here. for (size_t i = 0; i < psshBoxes.Length(); ++i) { Box box = psshBoxes[i]; if (i == 0 || box.Offset() != psshBoxes[i - 1].NextOffset()) { mPsshes.AppendElement(); } nsTArray<uint8_t>& pssh = mPsshes.LastElement(); pssh.AppendElements(box.Header()); pssh.AppendElements(box.Read()); } if (IsValid()) { if (mIndex.Length()) { // Ensure the samples are contiguous with no gaps. nsTArray<Sample*> ctsOrder; for (auto& sample : mIndex) { ctsOrder.AppendElement(&sample); } ctsOrder.Sort(CtsComparator()); for (size_t i = 1; i < ctsOrder.Length(); i++) { ctsOrder[i-1]->mCompositionRange.end = ctsOrder[i]->mCompositionRange.start; } // In MP4, the duration of a sample is defined as the delta between two decode // timestamps. The operation above has updated the duration of each sample // as a Sample's duration is mCompositionRange.end - mCompositionRange.start // MSE's TrackBuffersManager expects dts that increased by the sample's // duration, so we rewrite the dts accordingly. int64_t presentationDuration = ctsOrder.LastElement()->mCompositionRange.end - ctsOrder[0]->mCompositionRange.start; auto decodeOffset = aMdhd.ToMicroseconds((int64_t)*aDecodeTime - aEdts.mMediaStart); auto offsetOffset = aMvhd.ToMicroseconds(aEdts.mEmptyOffset); int64_t endDecodeTime = decodeOffset.isOk() & offsetOffset.isOk() ? decodeOffset.unwrap() + offsetOffset.unwrap() : 0; int64_t decodeDuration = endDecodeTime - mIndex[0].mDecodeTime; double adjust = !!presentationDuration ? (double)decodeDuration / presentationDuration : 0; int64_t dtsOffset = mIndex[0].mDecodeTime; int64_t compositionDuration = 0; // Adjust the dts, ensuring that the new adjusted dts will never be greater // than decodeTime (the next moof's decode start time). for (auto& sample : mIndex) { sample.mDecodeTime = dtsOffset + int64_t(compositionDuration * adjust); compositionDuration += sample.mCompositionRange.Length(); } mTimeRange = MP4Interval<Microseconds>(ctsOrder[0]->mCompositionRange.start, ctsOrder.LastElement()->mCompositionRange.end); } ProcessCenc(); } }