void strip(std::string &s, bool newlines) { const char *c = s.c_str(); int i = 0; if (newlines) while ((c[i] != 0) && (isblanktab(c[i]) || iscr(c[i]))) i++; else while ((c[i] != 0) && isblanktab(c[i])) i++; if (i > 0) s.erase(0, i); strip_back(s, newlines); }
void srt_parser_c::parse() { boost::regex timecode_re(SRT_RE_TIMECODE_LINE, boost::regex::perl); boost::regex number_re("^\\d+$", boost::regex::perl); boost::regex coordinates_re(SRT_RE_COORDINATES, boost::regex::perl); int64_t start = 0; int64_t end = 0; int64_t previous_start = 0; bool timecode_warning_printed = false; parser_state_e state = STATE_INITIAL; int line_number = 0; unsigned int subtitle_number = 0; unsigned int timecode_number = 0; std::string subtitles; m_io->setFilePointer(0, seek_beginning); while (1) { std::string s; if (!m_io->getline2(s)) break; line_number++; strip_back(s); if (s.empty()) { if ((STATE_INITIAL == state) || (STATE_TIME == state)) continue; state = STATE_SUBS_OR_NUMBER; if (!subtitles.empty()) subtitles += "\n"; subtitles += "\n"; continue; } if (STATE_INITIAL == state) { if (!boost::regex_match(s, number_re)) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Error in line %1%: expected subtitle number and found some text.\n")) % line_number); break; } state = STATE_TIME; parse_number(s, subtitle_number); } else if (STATE_TIME == state) { boost::smatch matches; if (!boost::regex_search(s, matches, timecode_re)) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Error in line %1%: expected a SRT timecode line but found something else. Aborting this file.\n")) % line_number); break; } int s_h = 0, s_min = 0, s_sec = 0, e_h = 0, e_min = 0, e_sec = 0; // 1 2 3 4 5 6 7 8 // "\\s*(-?)\\s*(\\d+):\\s(-?)*(\\d+):\\s*(-?)(\\d+)[,\\.]\\s*(-?)(\\d+)?" parse_number(matches[ 2].str(), s_h); parse_number(matches[ 4].str(), s_min); parse_number(matches[ 6].str(), s_sec); parse_number(matches[10].str(), e_h); parse_number(matches[12].str(), e_min); parse_number(matches[14].str(), e_sec); std::string s_rest = matches[ 8].str(); std::string e_rest = matches[16].str(); auto neg_calculator = [&](size_t const start_idx) -> int64_t { int64_t neg = 1; for (size_t idx = start_idx; idx <= (start_idx + 6); idx += 2) neg *= matches[idx].str() == "-" ? -1 : 1; return neg; }; int64_t s_neg = neg_calculator(1); int64_t e_neg = neg_calculator(9); if (boost::regex_search(s, coordinates_re) && !m_coordinates_warning_shown) { mxwarn_tid(m_file_name, m_tid, Y("This file contains coordinates in the timecode lines. " "Such coordinates are not supported by the Matroska SRT subtitle format. " "The coordinates will be removed automatically.\n")); m_coordinates_warning_shown = true; } // The previous entry is done now. Append it to the list of subtitles. if (!subtitles.empty()) { strip_back(subtitles, true); add(start, end, timecode_number, subtitles.c_str()); } // Calculate the start and end time in ns precision for the following entry. start = (int64_t)s_h * 60 * 60 + s_min * 60 + s_sec; end = (int64_t)e_h * 60 * 60 + e_min * 60 + e_sec; start *= 1000000000ll * s_neg; end *= 1000000000ll * e_neg; while (s_rest.length() < 9) s_rest += "0"; if (s_rest.length() > 9) s_rest.erase(9); start += atol(s_rest.c_str()); while (e_rest.length() < 9) e_rest += "0"; if (e_rest.length() > 9) e_rest.erase(9); end += atol(e_rest.c_str()); if (0 > start) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Line %1%: Negative timestamp encountered. The entry will be adjusted to start from 00:00:00.000.\n")) % line_number); end -= start; start = 0; if (0 > end) end *= -1; } // There are files for which start timecodes overlap. Matroska requires // blocks to be sorted by their timecode. mkvmerge does this at the end // of this function, but warn the user that the original order is being // changed. if (!timecode_warning_printed && (start < previous_start)) { mxwarn_tid(m_file_name, m_tid, boost::format(Y("Warning in line %1%: The start timecode is smaller than that of the previous entry. " "All entries from this file will be sorted by their start time.\n")) % line_number); timecode_warning_printed = true; } previous_start = start; subtitles = ""; state = STATE_SUBS; timecode_number = subtitle_number; } else if (STATE_SUBS == state) { if (!subtitles.empty()) subtitles += "\n"; subtitles += s; } else if (boost::regex_match(s, number_re)) { state = STATE_TIME; parse_number(s, subtitle_number); } else { if (!subtitles.empty()) subtitles += "\n"; subtitles += s; } } if (!subtitles.empty()) { strip_back(subtitles, true); add(start, end, timecode_number, subtitles.c_str()); } sort(); }