bool Parser::MatchTypeAndEpisodePattern(const string_t& word, Token& token) { size_t number_begin = FindNumberInString(word); auto prefix = word.substr(0, number_begin); ElementCategory category = kElementAnimeType; KeywordOptions options; if (keyword_manager.Find(keyword_manager.Normalize(prefix), category, options)) { elements_.insert(kElementAnimeType, prefix); auto number = word.substr(number_begin); if (MatchEpisodePatterns(number, token) || SetEpisodeNumber(number, token, true)) { auto it = std::find(tokens_.begin(), tokens_.end(), token); if (it != tokens_.end()) { // Split token (we do this last in order to avoid invalidating our // token reference earlier) token.content = number; tokens_.insert(it, Token(options.identifiable ? kIdentifier : kUnknown, prefix, token.enclosed)); } return true; } } return false; }
bool Parser::NumberComesAfterEpisodePrefix(Token& token) { size_t number_begin = FindNumberInString(token.content); auto prefix = keyword_manager.Normalize(token.content.substr(0, number_begin)); if (keyword_manager.Find(kElementEpisodePrefix, prefix)) { auto number = token.content.substr( number_begin, token.content.length() - number_begin); if (!MatchEpisodePatterns(number, token)) SetEpisodeNumber(number, token, false); return true; } return false; }
bool Parser::CheckEpisodeKeyword(const token_iterator_t token) { auto next_token = FindNextToken(tokens_, token, kFlagNotDelimiter); if (next_token != tokens_.end() && next_token->category == kUnknown) { if (FindNumberInString(next_token->content) == 0) { if (!MatchEpisodePatterns(next_token->content, *next_token)) SetEpisodeNumber(next_token->content, *next_token, false); token->category = kIdentifier; return true; } } return false; }
void Parser::SearchForEpisodeNumber() { // List all unknown tokens that contain a number std::vector<size_t> tokens; for (size_t i = 0; i < tokens_.size(); ++i) { auto& token = tokens_.at(i); if (token.category == kUnknown) if (FindNumberInString(token.content) != token.content.npos) tokens.push_back(i); } if (tokens.empty()) return; found_episode_keywords_ = !elements_.empty(kElementEpisodeNumber); // If a token matches a known episode pattern, it has to be the episode number if (SearchForEpisodePatterns(tokens)) return; if (!elements_.empty(kElementEpisodeNumber)) return; // We have previously found an episode number via keywords // From now on, we're only interested in numeric tokens auto not_numeric_string = [&](size_t index) -> bool { return !IsNumericString(tokens_.at(index).content); }; tokens.erase(std::remove_if(tokens.begin(), tokens.end(), not_numeric_string), tokens.end()); if (tokens.empty()) return; // e.g. "01 (176)", "29 (04)" if (SearchForEquivalentNumbers(tokens)) return; // e.g. " - 08" if (SearchForSeparatedNumbers(tokens)) return; // e.g. "[12]", "(2006)" if (SearchForIsolatedNumbers(tokens)) return; // Consider using the last number as a last resort SearchForLastNumber(tokens); }
void Parser::SearchForEpisodeNumber() { // List all unknown tokens that contain a number std::vector<size_t> tokens; for (size_t i = 0; i < tokens_.size(); ++i) { Token& token = tokens_.at(i); if (token.category == kUnknown) if (FindNumberInString(token.content) != token.content.npos) tokens.push_back(i); } if (tokens.empty()) return; // If a token matches a known episode pattern, it has to be the episode number if (SearchForEpisodePatterns(tokens)) return; // From now on, we're only interested in numeric tokens tokens.erase(std::remove_if(tokens.begin(), tokens.end(), std::bind1st(std::mem_fun(&Parser::not_numeric_string), this)), tokens.end()); if (tokens.empty()) return; // e.g. "01 (176)", "29 (04)" if (SearchForEquivalentNumbers(tokens)) return; // e.g. " - 08" if (SearchForSeparatedNumbers(tokens)) return; // e.g. "[12]", "(2006)" if (SearchForIsolatedNumbers(tokens)) return; // Consider using the last number as a last resort SearchForLastNumber(tokens); }
void Parser::SearchForEpisodeNumber() { // List all tokens that contain a number std::vector<size_t> tokens; for (size_t i = 0; i < tokens_.size(); ++i) { auto& token = tokens_.at(i); if (token.category != kUnknown) continue; // Skip previously identified tokens if (FindNumberInString(token.content) != token.content.npos) tokens.push_back(i); } if (tokens.empty()) return; // If a token matches a known episode pattern, it has to be the episode number if (SearchForEpisodePatterns(tokens)) return; // From now on, we're only interested in numeric tokens auto not_numeric_string = [&](size_t index) -> bool { return !IsNumericString(tokens_.at(index).content); }; tokens.erase(std::remove_if(tokens.begin(), tokens.end(), not_numeric_string), tokens.end()); if (tokens.empty()) return; // e.g. "[12]", "(2006)" if (SearchForIsolatedNumbers(tokens)) return; // e.g. " - 08" if (SearchForSeparatedNumbers(tokens)) return; // Consider using the last number as a last resort SearchForLastNumber(tokens); }