Example #1
0
void
strip(std::string &s,
      bool newlines) {
  const char *c = s.c_str();
  int i         = 0;

  if (newlines)
    while ((c[i] != 0) && (isblanktab(c[i]) || iscr(c[i])))
      i++;
  else
    while ((c[i] != 0) && isblanktab(c[i]))
      i++;

  if (i > 0)
    s.erase(0, i);

  strip_back(s, newlines);
}
Example #2
0
void
srt_parser_c::parse() {
    boost::regex timecode_re(SRT_RE_TIMECODE_LINE, boost::regex::perl);
    boost::regex number_re("^\\d+$", boost::regex::perl);
    boost::regex coordinates_re(SRT_RE_COORDINATES, boost::regex::perl);

    int64_t start                 = 0;
    int64_t end                   = 0;
    int64_t previous_start        = 0;
    bool timecode_warning_printed = false;
    parser_state_e state          = STATE_INITIAL;
    int line_number               = 0;
    unsigned int subtitle_number  = 0;
    unsigned int timecode_number  = 0;
    std::string subtitles;

    m_io->setFilePointer(0, seek_beginning);

    while (1) {
        std::string s;
        if (!m_io->getline2(s))
            break;

        line_number++;
        strip_back(s);

        if (s.empty()) {
            if ((STATE_INITIAL == state) || (STATE_TIME == state))
                continue;

            state = STATE_SUBS_OR_NUMBER;

            if (!subtitles.empty())
                subtitles += "\n";
            subtitles += "\n";
            continue;
        }

        if (STATE_INITIAL == state) {
            if (!boost::regex_match(s, number_re)) {
                mxwarn_tid(m_file_name, m_tid, boost::format(Y("Error in line %1%: expected subtitle number and found some text.\n")) % line_number);
                break;
            }
            state = STATE_TIME;
            parse_number(s, subtitle_number);

        } else if (STATE_TIME == state) {
            boost::smatch matches;
            if (!boost::regex_search(s, matches, timecode_re)) {
                mxwarn_tid(m_file_name, m_tid, boost::format(Y("Error in line %1%: expected a SRT timecode line but found something else. Aborting this file.\n")) % line_number);
                break;
            }

            int s_h = 0, s_min = 0, s_sec = 0, e_h = 0, e_min = 0, e_sec = 0;

            //        1         2       3      4        5     6             7    8
            // "\\s*(-?)\\s*(\\d+):\\s(-?)*(\\d+):\\s*(-?)(\\d+)[,\\.]\\s*(-?)(\\d+)?"

            parse_number(matches[ 2].str(), s_h);
            parse_number(matches[ 4].str(), s_min);
            parse_number(matches[ 6].str(), s_sec);
            parse_number(matches[10].str(), e_h);
            parse_number(matches[12].str(), e_min);
            parse_number(matches[14].str(), e_sec);

            std::string s_rest = matches[ 8].str();
            std::string e_rest = matches[16].str();

            auto neg_calculator = [&](size_t const start_idx) -> int64_t {
                int64_t neg = 1;
                for (size_t idx = start_idx; idx <= (start_idx + 6); idx += 2)
                    neg *= matches[idx].str() == "-" ? -1 : 1;
                return neg;
            };

            int64_t s_neg = neg_calculator(1);
            int64_t e_neg = neg_calculator(9);

            if (boost::regex_search(s, coordinates_re) && !m_coordinates_warning_shown) {
                mxwarn_tid(m_file_name, m_tid,
                           Y("This file contains coordinates in the timecode lines. "
                             "Such coordinates are not supported by the Matroska SRT subtitle format. "
                             "The coordinates will be removed automatically.\n"));
                m_coordinates_warning_shown = true;
            }

            // The previous entry is done now. Append it to the list of subtitles.
            if (!subtitles.empty()) {
                strip_back(subtitles, true);
                add(start, end, timecode_number, subtitles.c_str());
            }

            // Calculate the start and end time in ns precision for the following entry.
            start  = (int64_t)s_h * 60 * 60 + s_min * 60 + s_sec;
            end    = (int64_t)e_h * 60 * 60 + e_min * 60 + e_sec;

            start *= 1000000000ll * s_neg;
            end   *= 1000000000ll * e_neg;

            while (s_rest.length() < 9)
                s_rest += "0";
            if (s_rest.length() > 9)
                s_rest.erase(9);
            start += atol(s_rest.c_str());

            while (e_rest.length() < 9)
                e_rest += "0";
            if (e_rest.length() > 9)
                e_rest.erase(9);
            end += atol(e_rest.c_str());

            if (0 > start) {
                mxwarn_tid(m_file_name, m_tid,
                           boost::format(Y("Line %1%: Negative timestamp encountered. The entry will be adjusted to start from 00:00:00.000.\n")) % line_number);
                end   -= start;
                start  = 0;
                if (0 > end)
                    end *= -1;
            }

            // There are files for which start timecodes overlap. Matroska requires
            // blocks to be sorted by their timecode. mkvmerge does this at the end
            // of this function, but warn the user that the original order is being
            // changed.
            if (!timecode_warning_printed && (start < previous_start)) {
                mxwarn_tid(m_file_name, m_tid, boost::format(Y("Warning in line %1%: The start timecode is smaller than that of the previous entry. "
                           "All entries from this file will be sorted by their start time.\n")) % line_number);
                timecode_warning_printed = true;
            }

            previous_start  = start;
            subtitles       = "";
            state           = STATE_SUBS;
            timecode_number = subtitle_number;

        } else if (STATE_SUBS == state) {
            if (!subtitles.empty())
                subtitles += "\n";
            subtitles += s;

        } else if (boost::regex_match(s, number_re)) {
            state = STATE_TIME;
            parse_number(s, subtitle_number);

        } else {
            if (!subtitles.empty())
                subtitles += "\n";
            subtitles += s;
        }
    }

    if (!subtitles.empty()) {
        strip_back(subtitles, true);
        add(start, end, timecode_number, subtitles.c_str());
    }

    sort();
}