コード例 #1
0
ファイル: parser_helper.cpp プロジェクト: Soinou/MeliMelo
    bool Parser::CheckAnimeSeasonKeyword(const token_iterator_t token)
    {
        auto set_anime_season = [&](token_iterator_t first, token_iterator_t second,
                                    const string_t& content)
        {
            elements_.insert(kElementAnimeSeason, content);
            first->category = kIdentifier;
            second->category = kIdentifier;
        };

        auto previous_token = FindPreviousToken(tokens_, token, kFlagNotDelimiter);
        if (previous_token != tokens_.end())
        {
            auto number = GetNumberFromOrdinal(previous_token->content);
            if (!number.empty())
            {
                set_anime_season(previous_token, token, number);
                return true;
            }
        }

        auto next_token = FindNextToken(tokens_, token, kFlagNotDelimiter);
        if (next_token != tokens_.end() &&
            IsNumericString(next_token->content))
        {
            set_anime_season(token, next_token, next_token->content);
            return true;
        }

        return false;
    }
コード例 #2
0
ファイル: parser.cpp プロジェクト: gamedeff/anitomy
void Parser::SearchForReleaseGroup() {
  token_container_t::iterator token_begin = tokens_.begin();
  token_container_t::iterator token_end = tokens_.begin();

  do {
    // Find the first enclosed unknown token
    token_begin = FindToken(token_end, tokens_.end(),
                            kFlagEnclosed | kFlagUnknown);
    if (token_begin == tokens_.end())
      return;

    // Continue until a bracket or identifier is found
    token_end = FindToken(token_begin, tokens_.end(),
                          kFlagBracket | kFlagIdentifier);
    if (token_end->category != kBracket)
      continue;

    // Ignore if it's not the first non-delimiter token in group
    token_container_t::iterator previous_token = FindPreviousToken(tokens_, token_begin,
                                            kFlagNotDelimiter);
    if (previous_token != tokens_.end() &&
        previous_token->category != kBracket) {
      continue;
    }

    // Build release group
    BuildElement(kElementReleaseGroup, true, token_begin, token_end);
    return;
  } while (token_begin != tokens_.end());
}
コード例 #3
0
ファイル: parser_helper.cpp プロジェクト: KasaiDot/anitomy
bool Parser::IsTokenIsolated(const token_iterator_t token) const {
  auto previous_token = FindPreviousToken(tokens_, token, kFlagNotDelimiter);
  if (previous_token == tokens_.end() || previous_token->category != kBracket)
    return false;

  auto next_token = FindNextToken(tokens_, token, kFlagNotDelimiter);
  if (next_token == tokens_.end() || next_token->category != kBracket)
    return false;

  return true;
}
コード例 #4
0
ファイル: parser_number.cpp プロジェクト: KasaiDot/anitomy
bool Parser::SearchForSeparatedNumbers(std::vector<size_t>& tokens) {
  for (auto token_index = tokens.begin();
       token_index != tokens.end(); ++token_index) {
    auto token = tokens_.begin() + *token_index;
    auto previous_token = FindPreviousToken(tokens_, token, kFlagNotDelimiter);

    // See if the number has a preceding "-" separator
    if (previous_token != tokens_.end() &&
        previous_token->category == kUnknown &&
        IsDashCharacter(previous_token->content)) {
      if (SetEpisodeNumber(token->content, *token, true)) {
        previous_token->category = kIdentifier;
        return true;
      }
    }
  }

  return false;
}
コード例 #5
0
ファイル: parser_number.cpp プロジェクト: KasaiDot/anitomy
bool Parser::SearchForLastNumber(std::vector<size_t>& tokens) {
  for (auto it = tokens.rbegin(); it != tokens.rend(); ++it) {
    size_t token_index = *it;
    auto token = tokens_.begin() + token_index;

    // Assuming that episode number always comes after the title, first token
    // cannot be what we're looking for
    if (token_index == 0)
      continue;

    // An enclosed token is unlikely to be the episode number at this point
    if (token->enclosed)
      continue;

    // Ignore if it's the first non-enclosed, non-delimiter token
    if (std::all_of(tokens_.begin(), token, [](const Token& token) {
            return token.enclosed || token.category == kDelimiter; }))
      continue;

    // Ignore if the previous token is "Movie" or "Part"
    auto previous_token = FindPreviousToken(tokens_, token, kFlagNotDelimiter);
    if (previous_token != tokens_.end() &&
        previous_token->category == kUnknown) {
      if (IsStringEqualTo(previous_token->content, L"Movie") ||
          IsStringEqualTo(previous_token->content, L"Part")) {
        continue;
      }
    }

    // We'll use this number after all
    if (SetEpisodeNumber(token->content, *token, true))
      return true;
  }

  return false;
}
コード例 #6
0
ファイル: parser.cpp プロジェクト: erengy/anitomy
void Parser::SearchForAnimeTitle() {
  bool enclosed_title = false;

  // Find the first non-enclosed unknown token
  auto token_begin = FindToken(tokens_.begin(), tokens_.end(),
                               kFlagNotEnclosed | kFlagUnknown);

  // If that doesn't work, find the first unknown token in the second enclosed
  // group, assuming that the first one is the release group
  if (token_begin == tokens_.end()) {
    enclosed_title = true;
    token_begin = tokens_.begin();
    bool skipped_previous_group = false;
    do {
      token_begin = FindToken(token_begin, tokens_.end(), kFlagUnknown);
      if (token_begin == tokens_.end())
        break;
      // Ignore groups that are composed of non-Latin characters
      if (IsMostlyLatinString(token_begin->content))
        if (skipped_previous_group)
          break;  // Found it
      // Get the first unknown token of the next group
      token_begin = FindToken(token_begin, tokens_.end(), kFlagBracket);
      token_begin = FindToken(token_begin, tokens_.end(), kFlagUnknown);
      skipped_previous_group = true;
    } while (token_begin != tokens_.end());
  }
  if (token_begin == tokens_.end())
    return;

  // Continue until an identifier (or a bracket, if the title is enclosed)
  // is found
  auto token_end = FindToken(token_begin, tokens_.end(),
      kFlagIdentifier | (enclosed_title ? kFlagBracket : kFlagNone));

  // If within the interval there's an open bracket without its matching pair,
  // move the upper endpoint back to the bracket
  if (!enclosed_title) {
    auto last_bracket = token_end;
    bool bracket_open = false;
    for (auto token = token_begin; token != token_end; ++token) {
      if (token->category == kBracket) {
        last_bracket = token;
        bracket_open = !bracket_open;
      }
    }
    if (bracket_open)
      token_end = last_bracket;
  }

  // If the interval ends with an enclosed group (e.g. "Anime Title [Fansub]"),
  // move the upper endpoint back to the beginning of the group. We ignore
  // parentheses in order to keep certain groups (e.g. "(TV)") intact.
  if (!enclosed_title) {
    auto token = FindPreviousToken(tokens_, token_end, kFlagNotDelimiter);
    while (CheckTokenCategory(token, kBracket) &&
           token->content.front() != ')') {
      token = FindPreviousToken(tokens_, token, kFlagBracket);
      if (token != tokens_.end()) {
        token_end = token;
        token = FindPreviousToken(tokens_, token_end, kFlagNotDelimiter);
      }
    }
  }

  // Build anime title
  BuildElement(kElementAnimeTitle, false, token_begin, token_end);
}
コード例 #7
0
ファイル: tokenizer.cpp プロジェクト: arkenthera/anitomy
void Tokenizer::ValidateDelimiterTokens() {
  auto is_delimiter_token = [&](token_iterator_t it) {
    return it != tokens_.end() && it->category == kDelimiter;
  };
  auto is_unknown_token = [&](token_iterator_t it) {
    return it != tokens_.end() && it->category == kUnknown;
  };
  auto is_single_character_token = [&](token_iterator_t it) {
    return is_unknown_token(it) && it->content.size() == 1 &&
           it->content.front() != L'-';
  };
  auto append_token_to = [](token_iterator_t token,
                            token_iterator_t append_to) {
    append_to->content.append(token->content);
    token->category = kInvalid;
  };

  for (auto token = tokens_.begin(); token != tokens_.end(); ++token) {
    if (token->category != kDelimiter)
      continue;
    auto delimiter = token->content.front();
    auto prev_token = FindPreviousToken(tokens_, token, kFlagValid);
    auto next_token = FindNextToken(tokens_, token, kFlagValid);

    // Check for single-character tokens to prevent splitting group names,
    // keywords, episode number, etc.
    if (delimiter != L' ' && delimiter != L'_') {
      if (is_single_character_token(prev_token)) {
        append_token_to(token, prev_token);
        while (is_unknown_token(next_token)) {
          append_token_to(next_token, prev_token);
          next_token = FindNextToken(tokens_, next_token, kFlagValid);
          if (is_delimiter_token(next_token) &&
              next_token->content.front() == delimiter) {
            append_token_to(next_token, prev_token);
            next_token = FindNextToken(tokens_, next_token, kFlagValid);
          }
        }
        continue;
      }
      if (is_single_character_token(next_token)) {
        append_token_to(token, prev_token);
        append_token_to(next_token, prev_token);
        continue;
      }
    }

    // Check for adjacent delimiters
    if (is_unknown_token(prev_token) && is_delimiter_token(next_token)) {
      auto next_delimiter = next_token->content.front();
      if (delimiter != next_delimiter && delimiter != ',') {
        if (next_delimiter == ' ' || next_delimiter == '_') {
          append_token_to(token, prev_token);
        }
      }
    }
  }

  auto remove_if_invalid = std::remove_if(tokens_.begin(), tokens_.end(),
      [](const Token& token) -> bool {
        return token.category == kInvalid;
      });
  tokens_.erase(remove_if_invalid, tokens_.end());
}