void MatroskaFileParser::lookForNextBlock() { #ifdef DEBUG fprintf(stderr, "looking for Block\n"); #endif // Read and skip over each Matroska header, until we get to a 'Cluster': EBMLId id; EBMLDataSize size; while (fCurrentParseState == LOOKING_FOR_BLOCK) { while (!parseEBMLIdAndSize(id, size)) {} #ifdef DEBUG fprintf(stderr, "MatroskaFileParser::lookForNextBlock(): Parsed id 0x%s (%s), size: %lld\n", id.hexString(), id.stringName(), size.val()); #endif switch (id.val()) { case MATROSKA_ID_SEGMENT: { // 'Segment' header: enter this break; } case MATROSKA_ID_CLUSTER: { // 'Cluster' header: enter this break; } case MATROSKA_ID_TIMECODE: { // 'Timecode' header: get this value unsigned timecode; if (parseEBMLVal_unsigned(size, timecode)) { fClusterTimecode = timecode; #ifdef DEBUG fprintf(stderr, "\tCluster timecode: %d (== %f seconds)\n", fClusterTimecode, fClusterTimecode*(fOurFile.fTimecodeScale/1000000000.0)); #endif } break; } case MATROSKA_ID_BLOCK_GROUP: { // 'Block Group' header: enter this break; } case MATROSKA_ID_SIMPLEBLOCK: case MATROSKA_ID_BLOCK: { // 'SimpleBlock' or 'Block' header: enter this (and we're done) fBlockSize = (unsigned)size.val(); fCurrentParseState = PARSING_BLOCK; break; } case MATROSKA_ID_BLOCK_DURATION: { // 'Block Duration' header: get this value (but we currently don't do anything with it) unsigned blockDuration; if (parseEBMLVal_unsigned(size, blockDuration)) { #ifdef DEBUG fprintf(stderr, "\tblock duration: %d (== %f ms)\n", blockDuration, (float)(blockDuration*fOurFile.fTimecodeScale/1000000.0)); #endif } break; } default: { // skip over this header skipHeader(size); #ifdef DEBUG fprintf(stderr, "\tskipped %lld bytes\n", size.val()); #endif break; } } setParseState(); } }
Boolean MatroskaFileParser::parseEBMLVal_unsigned(EBMLDataSize& size, unsigned& result) { if (size.val() > 4) return False; // size too large u_int64_t result64; if (!parseEBMLVal_unsigned64(size, result64)) return False; result = (unsigned)result64; return True; }
void MatroskaFileParser::skipHeader(EBMLDataSize const& size) { unsigned sv = (unsigned)size.val(); // Hack: To avoid tripping into a parser 'internal error' if we try to skip an excessively large distance. // (Such large distances are likely caused by erroneous data. We might not be able to recover from this, but at least we won't // generate a parser 'internal error'.) if (sv > bankSize()-12) sv = bankSize()-12; skipBytes(sv); fCurOffsetInFile += sv; }
Boolean MatroskaFileParser::parseEBMLVal_unsigned64(EBMLDataSize& size, u_int64_t& result) { u_int64_t sv = size.val(); if (sv > 8) return False; // size too large result = 0; // initially for (unsigned i = (unsigned)sv; i > 0; --i) { if (fLimitOffsetInFile > 0 && fCurOffsetInFile > fLimitOffsetInFile) return False; // We've hit our pre-set limit u_int8_t c = get1Byte(); ++fCurOffsetInFile; result = result*256 + c; } return True; }
Boolean MatroskaFileParser::parseEBMLVal_float(EBMLDataSize& size, float& result) { switch (size.val()) { case 4: { unsigned resultAsUnsigned; if (!parseEBMLVal_unsigned(size, resultAsUnsigned)) return False; result = *(float*)&resultAsUnsigned; return True; } case 8: { u_int64_t resultAsU64; if (!parseEBMLVal_unsigned64(size, resultAsU64)) return False; result = (float)*(double*)&resultAsU64; return True; } default: { return False; } } }
Boolean MatroskaFileParser::parseEBMLVal_binary(EBMLDataSize& size, u_int8_t*& result) { unsigned resultLength = (unsigned)size.val(); result = new u_int8_t[resultLength]; if (result == NULL) return False; u_int8_t* p = result; unsigned i; for (i = 0; i < resultLength; ++i) { if (fLimitOffsetInFile > 0 && fCurOffsetInFile > fLimitOffsetInFile) break; // We've hit our pre-set limit u_int8_t c = get1Byte(); ++fCurOffsetInFile; *p++ = c; } if (i < resultLength) { // an error occurred delete[] result; result = NULL; return False; } return True; }
Boolean MatroskaFileParser::parseEBMLVal_string(EBMLDataSize& size, char*& result) { unsigned resultLength = (unsigned)size.val(); result = new char[resultLength + 1]; // allow for the trailing '\0' if (result == NULL) return False; char* p = result; unsigned i; for (i = 0; i < resultLength; ++i) { if (fLimitOffsetInFile > 0 && fCurOffsetInFile > fLimitOffsetInFile) break; // We've hit our pre-set limit u_int8_t c = get1Byte(); ++fCurOffsetInFile; *p++ = c; } if (i < resultLength) { // an error occurred delete[] result; result = NULL; return False; } *p = '\0'; return True; }
Boolean MatroskaFileParser::parseCues() { #if defined(DEBUG) || defined(DEBUG_CUES) fprintf(stderr, "parsing Cues\n"); #endif EBMLId id; EBMLDataSize size; // Read the next header, which should be MATROSKA_ID_CUES: if (!parseEBMLIdAndSize(id, size) || id != MATROSKA_ID_CUES) return True; // The header wasn't what we expected, so we're done fLimitOffsetInFile = fCurOffsetInFile + size.val(); // Make sure we don't read past the end of this header double currentCueTime = 0.0; u_int64_t currentClusterOffsetInFile = 0; while (fCurOffsetInFile < fLimitOffsetInFile) { while (!parseEBMLIdAndSize(id, size)) {} #ifdef DEBUG_CUES if (id == MATROSKA_ID_CUE_POINT) fprintf(stderr, "\n"); // makes debugging output easier to read fprintf(stderr, "MatroskaFileParser::parseCues(): Parsed id 0x%s (%s), size: %lld\n", id.hexString(), id.stringName(), size.val()); #endif switch (id.val()) { case MATROSKA_ID_CUE_POINT: { // 'Cue Point' header: enter this break; } case MATROSKA_ID_CUE_TIME: { // 'Cue Time' header: get this value unsigned cueTime; if (parseEBMLVal_unsigned(size, cueTime)) { currentCueTime = cueTime*(fOurFile.fTimecodeScale/1000000000.0); #ifdef DEBUG_CUES fprintf(stderr, "\tCue Time %d (== %f seconds)\n", cueTime, currentCueTime); #endif } break; } case MATROSKA_ID_CUE_TRACK_POSITIONS: { // 'Cue Track Positions' header: enter this break; } case MATROSKA_ID_CUE_TRACK: { // 'Cue Track' header: get this value (but only for debugging; we don't do anything with it) unsigned cueTrack; if (parseEBMLVal_unsigned(size, cueTrack)) { #ifdef DEBUG_CUES fprintf(stderr, "\tCue Track %d\n", cueTrack); #endif } break; } case MATROSKA_ID_CUE_CLUSTER_POSITION: { // 'Cue Cluster Position' header: get this value u_int64_t cueClusterPosition; if (parseEBMLVal_unsigned64(size, cueClusterPosition)) { currentClusterOffsetInFile = fOurFile.fSegmentDataOffset + cueClusterPosition; #ifdef DEBUG_CUES fprintf(stderr, "\tCue Cluster Position %llu (=> offset within the file: %llu (0x%llx))\n", cueClusterPosition, currentClusterOffsetInFile, currentClusterOffsetInFile); #endif // Record this cue point: fOurFile.addCuePoint(currentCueTime, currentClusterOffsetInFile, 1/*default block number within cluster*/); } break; } case MATROSKA_ID_CUE_BLOCK_NUMBER: { // 'Cue Block Number' header: get this value unsigned cueBlockNumber; if (parseEBMLVal_unsigned(size, cueBlockNumber) && cueBlockNumber != 0) { #ifdef DEBUG_CUES fprintf(stderr, "\tCue Block Number %d\n", cueBlockNumber); #endif // Record this cue point (overwriting any existing entry for this cue time): fOurFile.addCuePoint(currentCueTime, currentClusterOffsetInFile, cueBlockNumber); } break; } default: { // We don't process this header, so just skip over it: skipHeader(size); #ifdef DEBUG_CUES fprintf(stderr, "\tskipped %lld bytes\n", size.val()); #endif break; } } setParseState(); } fLimitOffsetInFile = 0; // reset #if defined(DEBUG) || defined(DEBUG_CUES) fprintf(stderr, "done parsing Cues\n"); #endif #ifdef DEBUG_CUES fprintf(stderr, "Cue Point tree: "); fOurFile.printCuePoints(stderr); fprintf(stderr, "\n"); #endif return True; // we're done parsing Cues }
Boolean MatroskaFileParser::parseTrack() { #ifdef DEBUG fprintf(stderr, "parsing Track\n"); #endif // Read and process each Matroska header, until we get to the end of the Track: MatroskaTrack* track = NULL; EBMLId id; EBMLDataSize size; while (fCurOffsetInFile < fLimitOffsetInFile) { while (!parseEBMLIdAndSize(id, size)) {} #ifdef DEBUG if (id == MATROSKA_ID_TRACK_ENTRY) fprintf(stderr, "\n"); // makes debugging output easier to read fprintf(stderr, "MatroskaFileParser::parseTrack(): Parsed id 0x%s (%s), size: %lld\n", id.hexString(), id.stringName(), size.val()); #endif switch (id.val()) { case MATROSKA_ID_TRACK_ENTRY: { // 'Track Entry' header: enter this // Create a new "MatroskaTrack" object for this entry: if (track != NULL && track->trackNumber == 0) delete track; // We had a previous "MatroskaTrack" object that was never used track = new MatroskaTrack; break; } case MATROSKA_ID_TRACK_NUMBER: { unsigned trackNumber; if (parseEBMLVal_unsigned(size, trackNumber)) { #ifdef DEBUG fprintf(stderr, "\tTrack Number %d\n", trackNumber); #endif if (track != NULL && trackNumber != 0) { track->trackNumber = trackNumber; fOurFile.fTracks.add(track, trackNumber); } } break; } case MATROSKA_ID_TRACK_TYPE: { unsigned trackType; if (parseEBMLVal_unsigned(size, trackType) && track != NULL) { // We convert the Matroska 'track type' code into our own code (which we can use as a bitmap): track->trackType = trackType == 1 ? MATROSKA_TRACK_TYPE_VIDEO : trackType == 2 ? MATROSKA_TRACK_TYPE_AUDIO : trackType == 0x11 ? MATROSKA_TRACK_TYPE_SUBTITLE : MATROSKA_TRACK_TYPE_OTHER; #ifdef DEBUG fprintf(stderr, "\tTrack Type 0x%02x (%s)\n", trackType, track->trackType == MATROSKA_TRACK_TYPE_VIDEO ? "video" : track->trackType == MATROSKA_TRACK_TYPE_AUDIO ? "audio" : track->trackType == MATROSKA_TRACK_TYPE_SUBTITLE ? "subtitle" : "<other>"); #endif } break; } case MATROSKA_ID_FLAG_ENABLED: { unsigned flagEnabled; if (parseEBMLVal_unsigned(size, flagEnabled)) { #ifdef DEBUG fprintf(stderr, "\tTrack is Enabled: %d\n", flagEnabled); #endif if (track != NULL) track->isEnabled = flagEnabled != 0; } break; } case MATROSKA_ID_FLAG_DEFAULT: { unsigned flagDefault; if (parseEBMLVal_unsigned(size, flagDefault)) { #ifdef DEBUG fprintf(stderr, "\tTrack is Default: %d\n", flagDefault); #endif if (track != NULL) track->isDefault = flagDefault != 0; } break; } case MATROSKA_ID_FLAG_FORCED: { unsigned flagForced; if (parseEBMLVal_unsigned(size, flagForced)) { #ifdef DEBUG fprintf(stderr, "\tTrack is Forced: %d\n", flagForced); #endif if (track != NULL) track->isForced = flagForced != 0; } break; } case MATROSKA_ID_DEFAULT_DURATION: { unsigned defaultDuration; if (parseEBMLVal_unsigned(size, defaultDuration)) { #ifdef DEBUG fprintf(stderr, "\tDefault duration %f ms\n", defaultDuration/1000000.0); #endif if (track != NULL) track->defaultDuration = defaultDuration; } break; } case MATROSKA_ID_MAX_BLOCK_ADDITION_ID: { unsigned maxBlockAdditionID; if (parseEBMLVal_unsigned(size, maxBlockAdditionID)) { #ifdef DEBUG fprintf(stderr, "\tMax Block Addition ID: %u\n", maxBlockAdditionID); #endif } break; } case MATROSKA_ID_NAME: { char* name; if (parseEBMLVal_string(size, name)) { #ifdef DEBUG fprintf(stderr, "\tName: %s\n", name); #endif if (track != NULL) { delete[] track->name; track->name = name; } else { delete[] name; } } break; } case MATROSKA_ID_LANGUAGE: { char* language; if (parseEBMLVal_string(size, language)) { #ifdef DEBUG fprintf(stderr, "\tLanguage: %s\n", language); #endif if (track != NULL) { delete[] track->language; track->language = language; } else { delete[] language; } } break; } case MATROSKA_ID_CODEC: { char* codecID; if (parseEBMLVal_string(size, codecID)) { #ifdef DEBUG fprintf(stderr, "\tCodec ID: %s\n", codecID); #endif if (track != NULL) { delete[] track->codecID; track->codecID = codecID; } else { delete[] codecID; } } break; } case MATROSKA_ID_CODEC_PRIVATE: { u_int8_t* codecPrivate; unsigned codecPrivateSize; if (parseEBMLVal_binary(size, codecPrivate)) { codecPrivateSize = (unsigned)size.val(); #ifdef DEBUG fprintf(stderr, "\tCodec Private: "); for (unsigned i = 0; i < codecPrivateSize; ++i) fprintf(stderr, "%02x:", codecPrivate[i]); fprintf(stderr, "\n"); #endif if (track != NULL) { delete[] track->codecPrivate; track->codecPrivate = codecPrivate; track->codecPrivateSize = codecPrivateSize; } else { delete[] codecPrivate; } } break; } case MATROSKA_ID_VIDEO: { // 'Video settings' header: enter this break; } case MATROSKA_ID_PIXEL_WIDTH: { unsigned pixelWidth; if (parseEBMLVal_unsigned(size, pixelWidth)) { #ifdef DEBUG fprintf(stderr, "\tPixel Width %d\n", pixelWidth); #endif } break; } case MATROSKA_ID_PIXEL_HEIGHT: { unsigned pixelHeight; if (parseEBMLVal_unsigned(size, pixelHeight)) { #ifdef DEBUG fprintf(stderr, "\tPixel Height %d\n", pixelHeight); #endif } break; } case MATROSKA_ID_DISPLAY_WIDTH: { unsigned displayWidth; if (parseEBMLVal_unsigned(size, displayWidth)) { #ifdef DEBUG fprintf(stderr, "\tDisplay Width %d\n", displayWidth); #endif } break; } case MATROSKA_ID_DISPLAY_HEIGHT: { unsigned displayHeight; if (parseEBMLVal_unsigned(size, displayHeight)) { #ifdef DEBUG fprintf(stderr, "\tDisplay Height %d\n", displayHeight); #endif } break; } case MATROSKA_ID_AUDIO: { // 'Audio settings' header: enter this break; } case MATROSKA_ID_SAMPLING_FREQUENCY: { float samplingFrequency; if (parseEBMLVal_float(size, samplingFrequency)) { if (track != NULL) { track->samplingFrequency = (unsigned)samplingFrequency; #ifdef DEBUG fprintf(stderr, "\tSampling frequency %f (->%d)\n", samplingFrequency, track->samplingFrequency); #endif } } break; } case MATROSKA_ID_OUTPUT_SAMPLING_FREQUENCY: { float outputSamplingFrequency; if (parseEBMLVal_float(size, outputSamplingFrequency)) { #ifdef DEBUG fprintf(stderr, "\tOutput sampling frequency %f\n", outputSamplingFrequency); #endif } break; } case MATROSKA_ID_CHANNELS: { unsigned numChannels; if (parseEBMLVal_unsigned(size, numChannels)) { #ifdef DEBUG fprintf(stderr, "\tChannels %d\n", numChannels); #endif if (track != NULL) track->numChannels = numChannels; } break; } case MATROSKA_ID_CONTENT_ENCODINGS: case MATROSKA_ID_CONTENT_ENCODING: { // 'Content Encodings' or 'Content Encoding' header: enter this break; } case MATROSKA_ID_CONTENT_COMPRESSION: { // 'Content Compression' header: enter this // Note: We currently support only 'Header Stripping' compression, not 'zlib' compression (the default algorithm). // Therefore, we disable this track, unless/until we later see that 'Header Stripping' is supported: if (track != NULL) track->isEnabled = False; break; } case MATROSKA_ID_CONTENT_COMP_ALGO: { unsigned contentCompAlgo; if (parseEBMLVal_unsigned(size, contentCompAlgo)) { #ifdef DEBUG fprintf(stderr, "\tContent Compression Algorithm %d (%s)\n", contentCompAlgo, contentCompAlgo == 0 ? "zlib" : contentCompAlgo == 3 ? "Header Stripping" : "<unknown>"); #endif // The only compression algorithm that we support is #3: Header Stripping; disable the track otherwise if (track != NULL) track->isEnabled = contentCompAlgo == 3; } break; } case MATROSKA_ID_CONTENT_COMP_SETTINGS: { u_int8_t* headerStrippedBytes; unsigned headerStrippedBytesSize; if (parseEBMLVal_binary(size, headerStrippedBytes)) { headerStrippedBytesSize = (unsigned)size.val(); #ifdef DEBUG fprintf(stderr, "\tHeader Stripped Bytes: "); for (unsigned i = 0; i < headerStrippedBytesSize; ++i) fprintf(stderr, "%02x:", headerStrippedBytes[i]); fprintf(stderr, "\n"); #endif if (track != NULL) { delete[] track->headerStrippedBytes; track->headerStrippedBytes = headerStrippedBytes; track->headerStrippedBytesSize = headerStrippedBytesSize; } else { delete[] headerStrippedBytes; } } break; } case MATROSKA_ID_CONTENT_ENCRYPTION: { // 'Content Encrpytion' header: skip this // Note: We don't currently support encryption at all. Therefore, we disable this track: if (track != NULL) track->isEnabled = False; // Fall through to... } default: { // We don't process this header, so just skip over it: skipHeader(size); #ifdef DEBUG fprintf(stderr, "\tskipped %lld bytes\n", size.val()); #endif break; } } setParseState(); } fLimitOffsetInFile = 0; // reset if (track != NULL && track->trackNumber == 0) delete track; // We had a previous "MatroskaTrack" object that was never used return True; // we're done parsing track entries }
void MatroskaFileParser::lookForNextTrack() { #ifdef DEBUG fprintf(stderr, "looking for Track\n"); #endif EBMLId id; EBMLDataSize size; // Read and skip over (or enter) each Matroska header, until we get to a 'Track'. while (fCurrentParseState == LOOKING_FOR_TRACKS) { while (!parseEBMLIdAndSize(id, size)) {} #ifdef DEBUG fprintf(stderr, "MatroskaFileParser::lookForNextTrack(): Parsed id 0x%s (%s), size: %lld\n", id.hexString(), id.stringName(), size.val()); #endif switch (id.val()) { case MATROSKA_ID_SEGMENT: { // 'Segment' header: enter this // Remember the position, within the file, of the start of Segment data, because Seek Positions are relative to this: fOurFile.fSegmentDataOffset = fCurOffsetInFile; break; } case MATROSKA_ID_SEEK_HEAD: { // 'Seek Head' header: enter this break; } case MATROSKA_ID_SEEK: { // 'Seek' header: enter this break; } case MATROSKA_ID_SEEK_ID: { // 'Seek ID' header: get this value if (parseEBMLNumber(fLastSeekId)) { #ifdef DEBUG fprintf(stderr, "\tSeek ID 0x%s:\t%s\n", fLastSeekId.hexString(), fLastSeekId.stringName()); #endif } break; } case MATROSKA_ID_SEEK_POSITION: { // 'Seek Position' header: get this value u_int64_t seekPosition; if (parseEBMLVal_unsigned64(size, seekPosition)) { u_int64_t offsetInFile = fOurFile.fSegmentDataOffset + seekPosition; #ifdef DEBUG fprintf(stderr, "\tSeek Position %llu (=> offset within the file: %llu (0x%llx))\n", seekPosition, offsetInFile, offsetInFile); #endif // The only 'Seek Position's that we care about are for 'Cluster' and 'Cues': if (fLastSeekId == MATROSKA_ID_CLUSTER) { fOurFile.fClusterOffset = offsetInFile; } else if (fLastSeekId == MATROSKA_ID_CUES) { fOurFile.fCuesOffset = offsetInFile; } } break; } case MATROSKA_ID_INFO: { // 'Segment Info' header: enter this break; } case MATROSKA_ID_TIMECODE_SCALE: { // 'Timecode Scale' header: get this value unsigned timecodeScale; if (parseEBMLVal_unsigned(size, timecodeScale) && timecodeScale > 0) { fOurFile.fTimecodeScale = timecodeScale; #ifdef DEBUG fprintf(stderr, "\tTimecode Scale %u ns (=> Segment Duration == %f seconds)\n", fOurFile.timecodeScale(), fOurFile.fileDuration()); #endif } break; } case MATROSKA_ID_DURATION: { // 'Segment Duration' header: get this value if (parseEBMLVal_float(size, fOurFile.fSegmentDuration)) { #ifdef DEBUG fprintf(stderr, "\tSegment Duration %f (== %f seconds)\n", fOurFile.segmentDuration(), fOurFile.fileDuration()); #endif } break; } case MATROSKA_ID_TRACKS: { // enter this, and move on to parsing 'Tracks' fLimitOffsetInFile = fCurOffsetInFile + size.val(); // Make sure we don't read past the end of this header fCurrentParseState = PARSING_TRACK; break; } default: { // skip over this header skipHeader(size); #ifdef DEBUG fprintf(stderr, "\tskipped %lld bytes\n", size.val()); #endif break; } } setParseState(); } }