Пример #1
0
void Parser::SearchForEpisodeTitle() {
  auto token_begin = tokens_.begin();
  auto token_end = tokens_.begin();

  do {
    // Find the first non-enclosed unknown token
    token_begin = FindToken(token_end, tokens_.end(),
                            kFlagNotEnclosed | kFlagUnknown);
    if (token_begin == tokens_.end())
      return;

    // Continue until a bracket or identifier is found
    token_end = FindToken(token_begin, tokens_.end(),
                          kFlagBracket | kFlagIdentifier);

    // Ignore if it's only a dash
    if (std::distance(token_begin, token_end) <= 2 &&
        IsDashCharacter(token_begin->content)) {
      continue;
    }

    // Build episode title
    BuildElement(kElementEpisodeTitle, false, token_begin, token_end);
    return;
  } while (token_begin != tokens_.end());
}
Пример #2
0
void Parser::SearchForReleaseGroup() {
  token_container_t::iterator token_begin = tokens_.begin();
  token_container_t::iterator token_end = tokens_.begin();

  do {
    // Find the first enclosed unknown token
    token_begin = FindToken(token_end, tokens_.end(),
                            kFlagEnclosed | kFlagUnknown);
    if (token_begin == tokens_.end())
      return;

    // Continue until a bracket or identifier is found
    token_end = FindToken(token_begin, tokens_.end(),
                          kFlagBracket | kFlagIdentifier);
    if (token_end->category != kBracket)
      continue;

    // Ignore if it's not the first non-delimiter token in group
    token_container_t::iterator previous_token = FindPreviousToken(tokens_, token_begin,
                                            kFlagNotDelimiter);
    if (previous_token != tokens_.end() &&
        previous_token->category != kBracket) {
      continue;
    }

    // Build release group
    BuildElement(kElementReleaseGroup, true, token_begin, token_end);
    return;
  } while (token_begin != tokens_.end());
}
Пример #3
0
void Parser::SearchForAnimeTitle() {
  // Find the first non-enclosed unknown token
  auto token_begin = std::find_if(tokens_.begin(), tokens_.end(),
      [](const Token& token) {
        return !token.enclosed && token.category == kUnknown;
      });
  if (token_begin == tokens_.end())
    return;

  // Continue until an identifier is found
  auto token_end = std::find_if(token_begin, tokens_.end(),
      [](const Token& token) {
        return token.category == kIdentifier;
      });
  // If within the interval there's an open bracket without its matching pair,
  // move the upper endpoint back to the bracket
  auto last_bracket = token_end;
  bool bracket_open = false;
  for (auto token = token_begin; token != token_end; ++token) {
    if (token->category == kBracket) {
      last_bracket = token;
      bracket_open = !bracket_open;
    }
  }
  if (bracket_open)
    token_end = last_bracket;

  // Build anime title
  BuildElement(kElementAnimeTitle, false, token_begin, token_end);
}
Пример #4
0
void Parser::SearchForAnimeTitle() {
  bool enclosed_title = false;

  // Find the first non-enclosed unknown token
  token_container_t::iterator token_begin = FindToken(tokens_.begin(), tokens_.end(),
                               kFlagNotEnclosed | kFlagUnknown);

  // If that doesn't work, find the first unknown token in the second enclosed
  // group, assuming that the first one is the release group
  if (token_begin == tokens_.end()) {
    enclosed_title = true;
    token_begin = tokens_.begin();
    bool skipped_previous_group = false;
    do {
      token_begin = FindToken(token_begin, tokens_.end(), kFlagUnknown);
      if (token_begin == tokens_.end())
        break;
      // Ignore groups that are composed of non-Latin characters
      if (IsMostlyLatinString(token_begin->content))
        if (skipped_previous_group)
          break;  // Found it
      // Get the first unknown token of the next group
      token_begin = FindToken(token_begin, tokens_.end(), kFlagBracket);
      token_begin = FindToken(token_begin, tokens_.end(), kFlagUnknown);
      skipped_previous_group = true;
    } while (token_begin != tokens_.end());
  }
  if (token_begin == tokens_.end())
    return;

  // Continue until an identifier (or a bracket, if the title is enclosed)
  // is found
  token_container_t::iterator token_end = FindToken(token_begin, tokens_.end(),
      kFlagIdentifier | (enclosed_title ? kFlagBracket : kFlagNone));

  // If within the interval there's an open bracket without its matching pair,
  // move the upper endpoint back to the bracket
  if (!enclosed_title) {
    token_container_t::iterator last_bracket = token_end;
    bool bracket_open = false;
    for (token_container_t::iterator token = token_begin; token != token_end; ++token) {
      if (token->category == kBracket) {
        last_bracket = token;
        bracket_open = !bracket_open;
      }
    }
    if (bracket_open)
      token_end = last_bracket;
  }

  // Build anime title
  BuildElement(kElementAnimeTitle, false, token_begin, token_end);
}
Пример #5
0
void Parser::SearchForReleaseGroup() {
  auto token_begin = tokens_.begin();
  auto token_end = tokens_.begin();

  do {
    // Find the first enclosed unknown token
    token_begin = std::find_if(token_end, tokens_.end(),
        [](const Token& token) {
          return token.enclosed && token.category == kUnknown;
        });
    if (token_begin == tokens_.end())
      continue;

    // Continue until a bracket or identifier is found
    token_end = std::find_if(token_begin, tokens_.end(),
        [](const Token& token) {
          return token.category == kBracket || token.category == kIdentifier;
        });
    if (token_end->category != kBracket)
      continue;

    // Ignore if it's not the first token in group
    auto previous_token = GetPreviousValidToken(token_begin);
    if (previous_token != tokens_.end() &&
        previous_token->category != kBracket) {
      continue;
    }

    // Build release group, or anime title if it wasn't found earlier
    if (elements_.empty(kElementReleaseGroup)) {
      BuildElement(kElementReleaseGroup, true, token_begin, token_end);
      if (elements_.empty(kElementAnimeTitle))
        continue;
    } else if (elements_.empty(kElementAnimeTitle)) {
      BuildElement(kElementAnimeTitle, false, token_begin, token_end);
      return;
    }

  } while (token_begin != tokens_.end());
}
Пример #6
0
void Parser::SearchForEpisodeTitle() {
  // Find the first non-enclosed unknown token
  token_container_t::iterator token_begin = FindToken(tokens_.begin(), tokens_.end(),
                               kFlagNotEnclosed | kFlagUnknown);
  if (token_begin == tokens_.end())
    return;

  // Continue until a bracket or identifier is found
  token_container_t::iterator token_end = FindToken(token_begin, tokens_.end(),
                             kFlagBracket | kFlagIdentifier);

  // Build episode title
  BuildElement(kElementEpisodeTitle, false, token_begin, token_end);
}
Пример #7
0
void Parser::SearchForEpisodeTitle() {
  // Find the first non-enclosed unknown token
  auto token_begin = std::find_if(tokens_.begin(), tokens_.end(),
      [](const Token& token) {
        return !token.enclosed && token.category == kUnknown;
      });
  if (token_begin == tokens_.end())
    return;

  // Continue until a bracket or identifier is found
  auto token_end = std::find_if(token_begin, tokens_.end(),
      [](const Token& token) {
        return token.category == kBracket || token.category == kIdentifier;
      });

  // Build episode title
  BuildElement(kElementEpisodeTitle, false, token_begin, token_end);
}
Пример #8
0
void Parser::SearchForAnimeTitle() {
  bool enclosed_title = false;

  // Find the first non-enclosed unknown token
  auto token_begin = FindToken(tokens_.begin(), tokens_.end(),
                               kFlagNotEnclosed | kFlagUnknown);

  // If that doesn't work, find the first unknown token in the second enclosed
  // group, assuming that the first one is the release group
  if (token_begin == tokens_.end()) {
    enclosed_title = true;
    token_begin = tokens_.begin();
    bool skipped_previous_group = false;
    do {
      token_begin = FindToken(token_begin, tokens_.end(), kFlagUnknown);
      if (token_begin == tokens_.end())
        break;
      // Ignore groups that are composed of non-Latin characters
      if (IsMostlyLatinString(token_begin->content))
        if (skipped_previous_group)
          break;  // Found it
      // Get the first unknown token of the next group
      token_begin = FindToken(token_begin, tokens_.end(), kFlagBracket);
      token_begin = FindToken(token_begin, tokens_.end(), kFlagUnknown);
      skipped_previous_group = true;
    } while (token_begin != tokens_.end());
  }
  if (token_begin == tokens_.end())
    return;

  // Continue until an identifier (or a bracket, if the title is enclosed)
  // is found
  auto token_end = FindToken(token_begin, tokens_.end(),
      kFlagIdentifier | (enclosed_title ? kFlagBracket : kFlagNone));

  // If within the interval there's an open bracket without its matching pair,
  // move the upper endpoint back to the bracket
  if (!enclosed_title) {
    auto last_bracket = token_end;
    bool bracket_open = false;
    for (auto token = token_begin; token != token_end; ++token) {
      if (token->category == kBracket) {
        last_bracket = token;
        bracket_open = !bracket_open;
      }
    }
    if (bracket_open)
      token_end = last_bracket;
  }

  // If the interval ends with an enclosed group (e.g. "Anime Title [Fansub]"),
  // move the upper endpoint back to the beginning of the group. We ignore
  // parentheses in order to keep certain groups (e.g. "(TV)") intact.
  if (!enclosed_title) {
    auto token = FindPreviousToken(tokens_, token_end, kFlagNotDelimiter);
    while (CheckTokenCategory(token, kBracket) &&
           token->content.front() != ')') {
      token = FindPreviousToken(tokens_, token, kFlagBracket);
      if (token != tokens_.end()) {
        token_end = token;
        token = FindPreviousToken(tokens_, token_end, kFlagNotDelimiter);
      }
    }
  }

  // Build anime title
  BuildElement(kElementAnimeTitle, false, token_begin, token_end);
}