void Parser::SearchForEpisodeNumber() { // List all unknown tokens that contain a number std::vector<size_t> tokens; for (size_t i = 0; i < tokens_.size(); ++i) { auto& token = tokens_.at(i); if (token.category == kUnknown) if (FindNumberInString(token.content) != token.content.npos) tokens.push_back(i); } if (tokens.empty()) return; found_episode_keywords_ = !elements_.empty(kElementEpisodeNumber); // If a token matches a known episode pattern, it has to be the episode number if (SearchForEpisodePatterns(tokens)) return; if (!elements_.empty(kElementEpisodeNumber)) return; // We have previously found an episode number via keywords // From now on, we're only interested in numeric tokens auto not_numeric_string = [&](size_t index) -> bool { return !IsNumericString(tokens_.at(index).content); }; tokens.erase(std::remove_if(tokens.begin(), tokens.end(), not_numeric_string), tokens.end()); if (tokens.empty()) return; // e.g. "01 (176)", "29 (04)" if (SearchForEquivalentNumbers(tokens)) return; // e.g. " - 08" if (SearchForSeparatedNumbers(tokens)) return; // e.g. "[12]", "(2006)" if (SearchForIsolatedNumbers(tokens)) return; // Consider using the last number as a last resort SearchForLastNumber(tokens); }
void Parser::SearchForEpisodeNumber() { // List all unknown tokens that contain a number std::vector<size_t> tokens; for (size_t i = 0; i < tokens_.size(); ++i) { Token& token = tokens_.at(i); if (token.category == kUnknown) if (FindNumberInString(token.content) != token.content.npos) tokens.push_back(i); } if (tokens.empty()) return; // If a token matches a known episode pattern, it has to be the episode number if (SearchForEpisodePatterns(tokens)) return; // From now on, we're only interested in numeric tokens tokens.erase(std::remove_if(tokens.begin(), tokens.end(), std::bind1st(std::mem_fun(&Parser::not_numeric_string), this)), tokens.end()); if (tokens.empty()) return; // e.g. "01 (176)", "29 (04)" if (SearchForEquivalentNumbers(tokens)) return; // e.g. " - 08" if (SearchForSeparatedNumbers(tokens)) return; // e.g. "[12]", "(2006)" if (SearchForIsolatedNumbers(tokens)) return; // Consider using the last number as a last resort SearchForLastNumber(tokens); }
void Parser::SearchForEpisodeNumber() { // List all tokens that contain a number std::vector<size_t> tokens; for (size_t i = 0; i < tokens_.size(); ++i) { auto& token = tokens_.at(i); if (token.category != kUnknown) continue; // Skip previously identified tokens if (FindNumberInString(token.content) != token.content.npos) tokens.push_back(i); } if (tokens.empty()) return; // If a token matches a known episode pattern, it has to be the episode number if (SearchForEpisodePatterns(tokens)) return; // From now on, we're only interested in numeric tokens auto not_numeric_string = [&](size_t index) -> bool { return !IsNumericString(tokens_.at(index).content); }; tokens.erase(std::remove_if(tokens.begin(), tokens.end(), not_numeric_string), tokens.end()); if (tokens.empty()) return; // e.g. "[12]", "(2006)" if (SearchForIsolatedNumbers(tokens)) return; // e.g. " - 08" if (SearchForSeparatedNumbers(tokens)) return; // Consider using the last number as a last resort SearchForLastNumber(tokens); }