bool timecode_factory_v2_c::get_next(packet_cptr &packet) { if ((static_cast<size_t>(m_frameno) >= m_timecodes.size()) && !m_warning_printed) { mxwarn_tid(m_source_name, m_tid, boost::format(Y("The number of external timecodes %1% is smaller than the number of frames in this track. " "The remaining frames of this track might not be timestamped the way you intended them to be. mkvmerge might even crash.\n")) % m_timecodes.size()); m_warning_printed = true; if (m_timecodes.empty()) { packet->assigned_timecode = 0; if (!m_preserve_duration || (0 >= packet->duration)) packet->duration = 0; } else { packet->assigned_timecode = m_timecodes.back(); if (!m_preserve_duration || (0 >= packet->duration)) packet->duration = m_timecodes.back(); } return false; } packet->assigned_timecode = m_timecodes[m_frameno]; if (!m_preserve_duration || (0 >= packet->duration)) packet->duration = m_durations[m_frameno]; m_frameno++; return false; }
void usf_reader_c::parse_subtitles(mtx::xml::document_cptr &doc) { for (auto subtitles = doc->document_element().child("subtitles"); subtitles; subtitles = subtitles.next_sibling("subtitles")) { auto track = std::make_shared<usf_track_t>(); m_tracks.push_back(track); auto attribute = subtitles.child("language").attribute("code"); if (attribute && !std::string{attribute.value()}.empty()) { int index = map_to_iso639_2_code(attribute.value()); if (-1 != index) track->m_language = iso639_languages[index].iso639_2_code; else if (!g_identifying) mxwarn_tid(m_ti.m_fname, m_tracks.size() - 1, boost::format(Y("The language code '%1%' is not a valid ISO639-2 language code and will be ignored.\n")) % attribute.value()); } for (auto subtitle = subtitles.child("subtitle"); subtitle; subtitle = subtitle.next_sibling("subtitle")) { usf_entry_t entry; int64_t duration = -1; attribute = subtitle.attribute("start"); if (attribute) entry.m_start = try_to_parse_timecode(attribute.value()); attribute = subtitle.attribute("stop"); if (attribute) entry.m_end = try_to_parse_timecode(attribute.value()); attribute = subtitle.attribute("duration"); if (attribute) duration = try_to_parse_timecode(attribute.value()); if ((-1 == entry.m_end) && (-1 != entry.m_start) && (-1 != duration)) entry.m_end = entry.m_start + duration; std::stringstream out; for (auto node : subtitle) node.print(out, "", pugi::format_default | pugi::format_raw); entry.m_text = out.str(); track->m_entries.push_back(entry); } } }
/* Process the contents of a page. First find the demuxer associated with the page's serial number. If there is no such demuxer then either the OGG file is damaged (very rare) or the page simply belongs to a stream that the user didn't want extracted. If the demuxer is found then hand over all packets in this page to the associated packetizer. */ void ogm_reader_c::process_page(ogg_page *og) { ogm_demuxer_cptr dmx; int64_t granulepos; dmx = find_demuxer(ogg_page_serialno(og)); if (!dmx || !dmx->in_use) return; granulepos = ogg_page_granulepos(og); if ((-1 != granulepos) && (granulepos < dmx->last_granulepos)) { mxwarn_tid(m_ti.m_fname, dmx->track_id, Y("The timecodes for this stream have been reset in the middle of the file. This is not supported. The current packet will be discarded.\n")); return; } ogg_stream_pagein(&dmx->os, og); dmx->process_page(granulepos); dmx->last_granulepos = granulepos; }
void generic_packetizer_c::add_packet2(packet_cptr pack) { if (pack->has_discard_padding()) set_required_matroska_version(4); pack->timecode = ADJUST_TIMECODE(pack->timecode); if (pack->has_bref()) pack->bref = ADJUST_TIMECODE(pack->bref); if (pack->has_fref()) pack->fref = ADJUST_TIMECODE(pack->fref); if (pack->has_duration()) { pack->duration = static_cast<int64_t>(pack->duration * m_ti.m_tcsync.numerator / m_ti.m_tcsync.denominator); if (pack->has_discard_padding()) pack->duration -= std::min(pack->duration, pack->discard_padding.to_ns()); } if ((2 > m_htrack_min_cache) && pack->has_fref()) { set_track_min_cache(2); rerender_track_headers(); } else if ((1 > m_htrack_min_cache) && pack->has_bref()) { set_track_min_cache(1); rerender_track_headers(); } if (0 > pack->timecode) return; // 'timecode < safety_last_timecode' may only occur for B frames. In this // case we have the coding order, e.g. IPB1B2 and the timecodes // I: 0, P: 120, B1: 40, B2: 80. if (!m_relaxed_timecode_checking && (pack->timecode < m_safety_last_timecode) && (0 > pack->fref) && hack_engaged(ENGAGE_ENABLE_TIMECODE_WARNING)) { if (track_audio == m_htrack_type) { int64_t needed_timecode_offset = m_safety_last_timecode + m_safety_last_duration - pack->timecode; m_correction_timecode_offset += needed_timecode_offset; pack->timecode += needed_timecode_offset; if (pack->has_bref()) pack->bref += needed_timecode_offset; if (pack->has_fref()) pack->fref += needed_timecode_offset; mxwarn_tid(m_ti.m_fname, m_ti.m_id, boost::format(Y("The current packet's timecode is smaller than that of the previous packet. " "This usually means that the source file is a Matroska file that has not been created 100%% correctly. " "The timecodes of all packets will be adjusted by %1%ms in order not to lose any data. " "This may throw audio/video synchronization off, but that can be corrected with mkvmerge's \"--sync\" option. " "If you already use \"--sync\" and you still get this warning then do NOT worry -- this is normal. " "If this error happens more than once and you get this message more than once for a particular track " "then either is the source file badly mastered, or mkvmerge contains a bug. " "In this case you should contact the author Moritz Bunkus <*****@*****.**>.\n")) % ((needed_timecode_offset + 500000) / 1000000)); } else mxwarn_tid(m_ti.m_fname, m_ti.m_id, boost::format("generic_packetizer_c::add_packet2: timecode < last_timecode (%1% < %2%). %3%\n") % format_timestamp(pack->timecode) % format_timestamp(m_safety_last_timecode) % BUGMSG); } m_safety_last_timecode = pack->timecode; m_safety_last_duration = pack->duration; pack->timecode_before_factory = pack->timecode; m_packet_queue.push_back(pack); if (!m_timestamp_factory || (TFA_IMMEDIATE == m_timestamp_factory_application_mode)) apply_factory_once(pack); else apply_factory(); }
void srt_parser_c::parse() { boost::regex timecode_re(SRT_RE_TIMECODE_LINE, boost::regex::perl); boost::regex number_re("^\\d+$", boost::regex::perl); boost::regex coordinates_re(SRT_RE_COORDINATES, boost::regex::perl); int64_t start = 0; int64_t end = 0; int64_t previous_start = 0; bool timecode_warning_printed = false; parser_state_e state = STATE_INITIAL; int line_number = 0; unsigned int subtitle_number = 0; unsigned int timecode_number = 0; std::string subtitles; m_io->setFilePointer(0, seek_beginning); while (1) { std::string s; if (!m_io->getline2(s)) break; line_number++; strip_back(s); if (s.empty()) { if ((STATE_INITIAL == state) || (STATE_TIME == state)) continue; state = STATE_SUBS_OR_NUMBER; if (!subtitles.empty()) subtitles += "\n"; subtitles += "\n"; continue; } if (STATE_INITIAL == state) { if (!boost::regex_match(s, number_re)) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Error in line %1%: expected subtitle number and found some text.\n")) % line_number); break; } state = STATE_TIME; parse_number(s, subtitle_number); } else if (STATE_TIME == state) { boost::smatch matches; if (!boost::regex_search(s, matches, timecode_re)) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Error in line %1%: expected a SRT timecode line but found something else. Aborting this file.\n")) % line_number); break; } int s_h = 0, s_min = 0, s_sec = 0, e_h = 0, e_min = 0, e_sec = 0; // 1 2 3 4 5 6 7 8 // "\\s*(-?)\\s*(\\d+):\\s(-?)*(\\d+):\\s*(-?)(\\d+)[,\\.]\\s*(-?)(\\d+)?" parse_number(matches[ 2].str(), s_h); parse_number(matches[ 4].str(), s_min); parse_number(matches[ 6].str(), s_sec); parse_number(matches[10].str(), e_h); parse_number(matches[12].str(), e_min); parse_number(matches[14].str(), e_sec); std::string s_rest = matches[ 8].str(); std::string e_rest = matches[16].str(); auto neg_calculator = [&](size_t const start_idx) -> int64_t { int64_t neg = 1; for (size_t idx = start_idx; idx <= (start_idx + 6); idx += 2) neg *= matches[idx].str() == "-" ? -1 : 1; return neg; }; int64_t s_neg = neg_calculator(1); int64_t e_neg = neg_calculator(9); if (boost::regex_search(s, coordinates_re) && !m_coordinates_warning_shown) { mxwarn_tid(m_file_name, m_tid, Y("This file contains coordinates in the timecode lines. " "Such coordinates are not supported by the Matroska SRT subtitle format. " "The coordinates will be removed automatically.\n")); m_coordinates_warning_shown = true; } // The previous entry is done now. Append it to the list of subtitles. if (!subtitles.empty()) { strip_back(subtitles, true); add(start, end, timecode_number, subtitles.c_str()); } // Calculate the start and end time in ns precision for the following entry. start = (int64_t)s_h * 60 * 60 + s_min * 60 + s_sec; end = (int64_t)e_h * 60 * 60 + e_min * 60 + e_sec; start *= 1000000000ll * s_neg; end *= 1000000000ll * e_neg; while (s_rest.length() < 9) s_rest += "0"; if (s_rest.length() > 9) s_rest.erase(9); start += atol(s_rest.c_str()); while (e_rest.length() < 9) e_rest += "0"; if (e_rest.length() > 9) e_rest.erase(9); end += atol(e_rest.c_str()); if (0 > start) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Line %1%: Negative timestamp encountered. The entry will be adjusted to start from 00:00:00.000.\n")) % line_number); end -= start; start = 0; if (0 > end) end *= -1; } // There are files for which start timecodes overlap. Matroska requires // blocks to be sorted by their timecode. mkvmerge does this at the end // of this function, but warn the user that the original order is being // changed. if (!timecode_warning_printed && (start < previous_start)) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Warning in line %1%: The start timecode is smaller than that of the previous entry. " "All entries from this file will be sorted by their start time.\n")) % line_number); timecode_warning_printed = true; } previous_start = start; subtitles = ""; state = STATE_SUBS; timecode_number = subtitle_number; } else if (STATE_SUBS == state) { if (!subtitles.empty()) subtitles += "\n"; subtitles += s; } else if (boost::regex_match(s, number_re)) { state = STATE_TIME; parse_number(s, subtitle_number); } else { if (!subtitles.empty()) subtitles += "\n"; subtitles += s; } } if (!subtitles.empty()) { strip_back(subtitles, true); add(start, end, timecode_number, subtitles.c_str()); } sort(); }
void ssa_parser_c::parse() { boost::regex sec_styles_ass_re("^\\s*\\[V4\\+\\s+Styles\\]", boost::regex::perl | boost::regex::icase); boost::regex sec_styles_re( "^\\s*\\[V4\\s+Styles\\]", boost::regex::perl | boost::regex::icase); boost::regex sec_info_re( "^\\s*\\[Script\\s+Info\\]", boost::regex::perl | boost::regex::icase); boost::regex sec_events_re( "^\\s*\\[Events\\]", boost::regex::perl | boost::regex::icase); boost::regex sec_graphics_re( "^\\s*\\[Graphics\\]", boost::regex::perl | boost::regex::icase); boost::regex sec_fonts_re( "^\\s*\\[Fonts\\]", boost::regex::perl | boost::regex::icase); int num = 0; ssa_section_e section = SSA_SECTION_NONE; ssa_section_e previous_section = SSA_SECTION_NONE; std::string name_field = "Name"; std::string attachment_name, attachment_data_uu; m_io->setFilePointer(0, seek_beginning); while (!m_io->eof()) { std::string line; if (!m_io->getline2(line)) break; bool add_to_global = true; // A normal line. Let's see if this file is ASS and not SSA. if (!strcasecmp(line.c_str(), "ScriptType: v4.00+")) m_is_ass = true; else if (boost::regex_search(line, sec_styles_ass_re)) { m_is_ass = true; section = SSA_SECTION_V4STYLES; } else if (boost::regex_search(line, sec_styles_re)) section = SSA_SECTION_V4STYLES; else if (boost::regex_search(line, sec_info_re)) section = SSA_SECTION_INFO; else if (boost::regex_search(line, sec_events_re)) section = SSA_SECTION_EVENTS; else if (boost::regex_search(line, sec_graphics_re)) { section = SSA_SECTION_GRAPHICS; add_to_global = false; } else if (boost::regex_search(line, sec_fonts_re)) { section = SSA_SECTION_FONTS; add_to_global = false; } else if (SSA_SECTION_EVENTS == section) { if (balg::istarts_with(line, "Format: ")) { // Analyze the format string. m_format = split(&line.c_str()[strlen("Format: ")]); strip(m_format); // Let's see if "Actor" is used in the format instead of "Name". size_t i; for (i = 0; m_format.size() > i; ++i) if (balg::iequals(m_format[i], "actor")) { name_field = "Actor"; break; } } else if (balg::istarts_with(line, "Dialogue: ")) { if (m_format.empty()) throw mtx::input::extended_x(Y("ssa_reader: Invalid format. Could not find the \"Format\" line in the \"[Events]\" section.")); std::string orig_line = line; line.erase(0, strlen("Dialogue: ")); // Trim the start. // Split the line into fields. std::vector<std::string> fields = split(line.c_str(), ",", m_format.size()); while (fields.size() < m_format.size()) fields.push_back(std::string("")); // Parse the start time. std::string stime = get_element("Start", fields); int64_t start = parse_time(stime); if (0 > start) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Malformed line? (%1%)\n")) % orig_line); continue; } // Parse the end time. stime = get_element("End", fields); int64_t end = parse_time(stime); if (0 > end) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Malformed line? (%1%)\n")) % orig_line); continue; } if (end < start) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Malformed line? (%1%)\n")) % orig_line); continue; } // Specs say that the following fields are to put into the block: // ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, // Text std::string comma = ","; line = to_string(num) + comma + get_element("Layer", fields) + comma + get_element("Style", fields) + comma + get_element(name_field.c_str(), fields) + comma + get_element("MarginL", fields) + comma + get_element("MarginR", fields) + comma + get_element("MarginV", fields) + comma + get_element("Effect", fields) + comma + recode_text(fields); add(start, end, num, line); num++; add_to_global = false; } } else if ((SSA_SECTION_FONTS == section) || (SSA_SECTION_GRAPHICS == section)) { if (balg::istarts_with(line, "fontname:")) { add_attachment_maybe(attachment_name, attachment_data_uu, section); line.erase(0, strlen("fontname:")); strip(line, true); attachment_name = line; } else { strip(line, true); attachment_data_uu += line; } add_to_global = false; } if (add_to_global) { m_global += line; m_global += "\r\n"; } if (previous_section != section) add_attachment_maybe(attachment_name, attachment_data_uu, previous_section); previous_section = section; } sort(); }