Exemplo n.º 1
0
bool Parser::MatchTypeAndEpisodePattern(const string_t& word, Token& token) {
  size_t number_begin = FindNumberInString(word);
  auto prefix = word.substr(0, number_begin);

  ElementCategory category = kElementAnimeType;
  KeywordOptions options;

  if (keyword_manager.Find(keyword_manager.Normalize(prefix),
                           category, options)) {
    elements_.insert(kElementAnimeType, prefix);
    auto number = word.substr(number_begin);
    if (MatchEpisodePatterns(number, token) ||
        SetEpisodeNumber(number, token, true)) {
      auto it = std::find(tokens_.begin(), tokens_.end(), token);
      if (it != tokens_.end()) {
        // Split token (we do this last in order to avoid invalidating our
        // token reference earlier)
        token.content = number;
        tokens_.insert(it, Token(options.identifiable ? kIdentifier : kUnknown,
                                 prefix, token.enclosed));
      }
      return true;
    }
  }

  return false;
}
Exemplo n.º 2
0
bool Parser::NumberComesAfterEpisodePrefix(Token& token) {
  size_t number_begin = FindNumberInString(token.content);
  auto prefix = keyword_manager.Normalize(token.content.substr(0, number_begin));

  if (keyword_manager.Find(kElementEpisodePrefix, prefix)) {
    auto number = token.content.substr(
        number_begin, token.content.length() - number_begin);
    if (!MatchEpisodePatterns(number, token))
      SetEpisodeNumber(number, token, false);
    return true;
  }

  return false;
}
Exemplo n.º 3
0
bool Parser::CheckEpisodeKeyword(const token_iterator_t token) {
  auto next_token = FindNextToken(tokens_, token, kFlagNotDelimiter);

  if (next_token != tokens_.end() &&
      next_token->category == kUnknown) {
    if (FindNumberInString(next_token->content) == 0) {
      if (!MatchEpisodePatterns(next_token->content, *next_token))
        SetEpisodeNumber(next_token->content, *next_token, false);
      token->category = kIdentifier;
      return true;
    }
  }

  return false;
}
Exemplo n.º 4
0
void Parser::SearchForEpisodeNumber() {
  // List all unknown tokens that contain a number
  std::vector<size_t> tokens;
  for (size_t i = 0; i < tokens_.size(); ++i) {
    auto& token = tokens_.at(i);
    if (token.category == kUnknown)
      if (FindNumberInString(token.content) != token.content.npos)
        tokens.push_back(i);
  }
  if (tokens.empty())
    return;

  found_episode_keywords_ = !elements_.empty(kElementEpisodeNumber);

  // If a token matches a known episode pattern, it has to be the episode number
  if (SearchForEpisodePatterns(tokens))
    return;

  if (!elements_.empty(kElementEpisodeNumber))
    return;  // We have previously found an episode number via keywords

  // From now on, we're only interested in numeric tokens
  auto not_numeric_string = [&](size_t index) -> bool {
    return !IsNumericString(tokens_.at(index).content);
  };
  tokens.erase(std::remove_if(tokens.begin(), tokens.end(), not_numeric_string),
               tokens.end());

  if (tokens.empty())
    return;

  // e.g. "01 (176)", "29 (04)"
  if (SearchForEquivalentNumbers(tokens))
    return;

  // e.g. " - 08"
  if (SearchForSeparatedNumbers(tokens))
    return;

  // e.g. "[12]", "(2006)"
  if (SearchForIsolatedNumbers(tokens))
    return;

  // Consider using the last number as a last resort
  SearchForLastNumber(tokens);
}
Exemplo n.º 5
0
void Parser::SearchForEpisodeNumber() {
  // List all unknown tokens that contain a number
  std::vector<size_t> tokens;
  for (size_t i = 0; i < tokens_.size(); ++i) {
    Token& token = tokens_.at(i);
    if (token.category == kUnknown)
      if (FindNumberInString(token.content) != token.content.npos)
        tokens.push_back(i);
  }
  if (tokens.empty())
    return;

  // If a token matches a known episode pattern, it has to be the episode number
  if (SearchForEpisodePatterns(tokens))
    return;

  // From now on, we're only interested in numeric tokens
  tokens.erase(std::remove_if(tokens.begin(), tokens.end(), std::bind1st(std::mem_fun(&Parser::not_numeric_string), this)),
               tokens.end());

  if (tokens.empty())
    return;

  // e.g. "01 (176)", "29 (04)"
  if (SearchForEquivalentNumbers(tokens))
    return;

  // e.g. " - 08"
  if (SearchForSeparatedNumbers(tokens))
    return;

  // e.g. "[12]", "(2006)"
  if (SearchForIsolatedNumbers(tokens))
    return;

  // Consider using the last number as a last resort
  SearchForLastNumber(tokens);
}
Exemplo n.º 6
0
void Parser::SearchForEpisodeNumber() {
  // List all tokens that contain a number
  std::vector<size_t> tokens;
  for (size_t i = 0; i < tokens_.size(); ++i) {
    auto& token = tokens_.at(i);
    if (token.category != kUnknown)
      continue;  // Skip previously identified tokens
    if (FindNumberInString(token.content) != token.content.npos)
      tokens.push_back(i);
  }
  if (tokens.empty())
    return;

  // If a token matches a known episode pattern, it has to be the episode number
  if (SearchForEpisodePatterns(tokens))
    return;

  // From now on, we're only interested in numeric tokens
  auto not_numeric_string = [&](size_t index) -> bool {
    return !IsNumericString(tokens_.at(index).content);
  };
  tokens.erase(std::remove_if(tokens.begin(), tokens.end(), not_numeric_string),
               tokens.end());

  if (tokens.empty())
    return;

  // e.g. "[12]", "(2006)"
  if (SearchForIsolatedNumbers(tokens))
    return;

  // e.g. " - 08"
  if (SearchForSeparatedNumbers(tokens))
    return;

  // Consider using the last number as a last resort
  SearchForLastNumber(tokens);
}