Ejemplo n.º 1
0
void Parser::SearchForEpisodeNumber() {
  // List all unknown tokens that contain a number
  std::vector<size_t> tokens;
  for (size_t i = 0; i < tokens_.size(); ++i) {
    auto& token = tokens_.at(i);
    if (token.category == kUnknown)
      if (FindNumberInString(token.content) != token.content.npos)
        tokens.push_back(i);
  }
  if (tokens.empty())
    return;

  found_episode_keywords_ = !elements_.empty(kElementEpisodeNumber);

  // If a token matches a known episode pattern, it has to be the episode number
  if (SearchForEpisodePatterns(tokens))
    return;

  if (!elements_.empty(kElementEpisodeNumber))
    return;  // We have previously found an episode number via keywords

  // From now on, we're only interested in numeric tokens
  auto not_numeric_string = [&](size_t index) -> bool {
    return !IsNumericString(tokens_.at(index).content);
  };
  tokens.erase(std::remove_if(tokens.begin(), tokens.end(), not_numeric_string),
               tokens.end());

  if (tokens.empty())
    return;

  // e.g. "01 (176)", "29 (04)"
  if (SearchForEquivalentNumbers(tokens))
    return;

  // e.g. " - 08"
  if (SearchForSeparatedNumbers(tokens))
    return;

  // e.g. "[12]", "(2006)"
  if (SearchForIsolatedNumbers(tokens))
    return;

  // Consider using the last number as a last resort
  SearchForLastNumber(tokens);
}
Ejemplo n.º 2
0
void Parser::SearchForEpisodeNumber() {
  // List all unknown tokens that contain a number
  std::vector<size_t> tokens;
  for (size_t i = 0; i < tokens_.size(); ++i) {
    Token& token = tokens_.at(i);
    if (token.category == kUnknown)
      if (FindNumberInString(token.content) != token.content.npos)
        tokens.push_back(i);
  }
  if (tokens.empty())
    return;

  // If a token matches a known episode pattern, it has to be the episode number
  if (SearchForEpisodePatterns(tokens))
    return;

  // From now on, we're only interested in numeric tokens
  tokens.erase(std::remove_if(tokens.begin(), tokens.end(), std::bind1st(std::mem_fun(&Parser::not_numeric_string), this)),
               tokens.end());

  if (tokens.empty())
    return;

  // e.g. "01 (176)", "29 (04)"
  if (SearchForEquivalentNumbers(tokens))
    return;

  // e.g. " - 08"
  if (SearchForSeparatedNumbers(tokens))
    return;

  // e.g. "[12]", "(2006)"
  if (SearchForIsolatedNumbers(tokens))
    return;

  // Consider using the last number as a last resort
  SearchForLastNumber(tokens);
}
Ejemplo n.º 3
0
void Parser::SearchForEpisodeNumber() {
  // List all tokens that contain a number
  std::vector<size_t> tokens;
  for (size_t i = 0; i < tokens_.size(); ++i) {
    auto& token = tokens_.at(i);
    if (token.category != kUnknown)
      continue;  // Skip previously identified tokens
    if (FindNumberInString(token.content) != token.content.npos)
      tokens.push_back(i);
  }
  if (tokens.empty())
    return;

  // If a token matches a known episode pattern, it has to be the episode number
  if (SearchForEpisodePatterns(tokens))
    return;

  // From now on, we're only interested in numeric tokens
  auto not_numeric_string = [&](size_t index) -> bool {
    return !IsNumericString(tokens_.at(index).content);
  };
  tokens.erase(std::remove_if(tokens.begin(), tokens.end(), not_numeric_string),
               tokens.end());

  if (tokens.empty())
    return;

  // e.g. "[12]", "(2006)"
  if (SearchForIsolatedNumbers(tokens))
    return;

  // e.g. " - 08"
  if (SearchForSeparatedNumbers(tokens))
    return;

  // Consider using the last number as a last resort
  SearchForLastNumber(tokens);
}