void Parser::SearchForEpisodeTitle() { auto token_begin = tokens_.begin(); auto token_end = tokens_.begin(); do { // Find the first non-enclosed unknown token token_begin = FindToken(token_end, tokens_.end(), kFlagNotEnclosed | kFlagUnknown); if (token_begin == tokens_.end()) return; // Continue until a bracket or identifier is found token_end = FindToken(token_begin, tokens_.end(), kFlagBracket | kFlagIdentifier); // Ignore if it's only a dash if (std::distance(token_begin, token_end) <= 2 && IsDashCharacter(token_begin->content)) { continue; } // Build episode title BuildElement(kElementEpisodeTitle, false, token_begin, token_end); return; } while (token_begin != tokens_.end()); }
void Parser::SearchForReleaseGroup() { token_container_t::iterator token_begin = tokens_.begin(); token_container_t::iterator token_end = tokens_.begin(); do { // Find the first enclosed unknown token token_begin = FindToken(token_end, tokens_.end(), kFlagEnclosed | kFlagUnknown); if (token_begin == tokens_.end()) return; // Continue until a bracket or identifier is found token_end = FindToken(token_begin, tokens_.end(), kFlagBracket | kFlagIdentifier); if (token_end->category != kBracket) continue; // Ignore if it's not the first non-delimiter token in group token_container_t::iterator previous_token = FindPreviousToken(tokens_, token_begin, kFlagNotDelimiter); if (previous_token != tokens_.end() && previous_token->category != kBracket) { continue; } // Build release group BuildElement(kElementReleaseGroup, true, token_begin, token_end); return; } while (token_begin != tokens_.end()); }
void Parser::SearchForAnimeTitle() { // Find the first non-enclosed unknown token auto token_begin = std::find_if(tokens_.begin(), tokens_.end(), [](const Token& token) { return !token.enclosed && token.category == kUnknown; }); if (token_begin == tokens_.end()) return; // Continue until an identifier is found auto token_end = std::find_if(token_begin, tokens_.end(), [](const Token& token) { return token.category == kIdentifier; }); // If within the interval there's an open bracket without its matching pair, // move the upper endpoint back to the bracket auto last_bracket = token_end; bool bracket_open = false; for (auto token = token_begin; token != token_end; ++token) { if (token->category == kBracket) { last_bracket = token; bracket_open = !bracket_open; } } if (bracket_open) token_end = last_bracket; // Build anime title BuildElement(kElementAnimeTitle, false, token_begin, token_end); }
void Parser::SearchForAnimeTitle() { bool enclosed_title = false; // Find the first non-enclosed unknown token token_container_t::iterator token_begin = FindToken(tokens_.begin(), tokens_.end(), kFlagNotEnclosed | kFlagUnknown); // If that doesn't work, find the first unknown token in the second enclosed // group, assuming that the first one is the release group if (token_begin == tokens_.end()) { enclosed_title = true; token_begin = tokens_.begin(); bool skipped_previous_group = false; do { token_begin = FindToken(token_begin, tokens_.end(), kFlagUnknown); if (token_begin == tokens_.end()) break; // Ignore groups that are composed of non-Latin characters if (IsMostlyLatinString(token_begin->content)) if (skipped_previous_group) break; // Found it // Get the first unknown token of the next group token_begin = FindToken(token_begin, tokens_.end(), kFlagBracket); token_begin = FindToken(token_begin, tokens_.end(), kFlagUnknown); skipped_previous_group = true; } while (token_begin != tokens_.end()); } if (token_begin == tokens_.end()) return; // Continue until an identifier (or a bracket, if the title is enclosed) // is found token_container_t::iterator token_end = FindToken(token_begin, tokens_.end(), kFlagIdentifier | (enclosed_title ? kFlagBracket : kFlagNone)); // If within the interval there's an open bracket without its matching pair, // move the upper endpoint back to the bracket if (!enclosed_title) { token_container_t::iterator last_bracket = token_end; bool bracket_open = false; for (token_container_t::iterator token = token_begin; token != token_end; ++token) { if (token->category == kBracket) { last_bracket = token; bracket_open = !bracket_open; } } if (bracket_open) token_end = last_bracket; } // Build anime title BuildElement(kElementAnimeTitle, false, token_begin, token_end); }
void Parser::SearchForReleaseGroup() { auto token_begin = tokens_.begin(); auto token_end = tokens_.begin(); do { // Find the first enclosed unknown token token_begin = std::find_if(token_end, tokens_.end(), [](const Token& token) { return token.enclosed && token.category == kUnknown; }); if (token_begin == tokens_.end()) continue; // Continue until a bracket or identifier is found token_end = std::find_if(token_begin, tokens_.end(), [](const Token& token) { return token.category == kBracket || token.category == kIdentifier; }); if (token_end->category != kBracket) continue; // Ignore if it's not the first token in group auto previous_token = GetPreviousValidToken(token_begin); if (previous_token != tokens_.end() && previous_token->category != kBracket) { continue; } // Build release group, or anime title if it wasn't found earlier if (elements_.empty(kElementReleaseGroup)) { BuildElement(kElementReleaseGroup, true, token_begin, token_end); if (elements_.empty(kElementAnimeTitle)) continue; } else if (elements_.empty(kElementAnimeTitle)) { BuildElement(kElementAnimeTitle, false, token_begin, token_end); return; } } while (token_begin != tokens_.end()); }
void Parser::SearchForEpisodeTitle() { // Find the first non-enclosed unknown token token_container_t::iterator token_begin = FindToken(tokens_.begin(), tokens_.end(), kFlagNotEnclosed | kFlagUnknown); if (token_begin == tokens_.end()) return; // Continue until a bracket or identifier is found token_container_t::iterator token_end = FindToken(token_begin, tokens_.end(), kFlagBracket | kFlagIdentifier); // Build episode title BuildElement(kElementEpisodeTitle, false, token_begin, token_end); }
void Parser::SearchForEpisodeTitle() { // Find the first non-enclosed unknown token auto token_begin = std::find_if(tokens_.begin(), tokens_.end(), [](const Token& token) { return !token.enclosed && token.category == kUnknown; }); if (token_begin == tokens_.end()) return; // Continue until a bracket or identifier is found auto token_end = std::find_if(token_begin, tokens_.end(), [](const Token& token) { return token.category == kBracket || token.category == kIdentifier; }); // Build episode title BuildElement(kElementEpisodeTitle, false, token_begin, token_end); }
void Parser::SearchForAnimeTitle() { bool enclosed_title = false; // Find the first non-enclosed unknown token auto token_begin = FindToken(tokens_.begin(), tokens_.end(), kFlagNotEnclosed | kFlagUnknown); // If that doesn't work, find the first unknown token in the second enclosed // group, assuming that the first one is the release group if (token_begin == tokens_.end()) { enclosed_title = true; token_begin = tokens_.begin(); bool skipped_previous_group = false; do { token_begin = FindToken(token_begin, tokens_.end(), kFlagUnknown); if (token_begin == tokens_.end()) break; // Ignore groups that are composed of non-Latin characters if (IsMostlyLatinString(token_begin->content)) if (skipped_previous_group) break; // Found it // Get the first unknown token of the next group token_begin = FindToken(token_begin, tokens_.end(), kFlagBracket); token_begin = FindToken(token_begin, tokens_.end(), kFlagUnknown); skipped_previous_group = true; } while (token_begin != tokens_.end()); } if (token_begin == tokens_.end()) return; // Continue until an identifier (or a bracket, if the title is enclosed) // is found auto token_end = FindToken(token_begin, tokens_.end(), kFlagIdentifier | (enclosed_title ? kFlagBracket : kFlagNone)); // If within the interval there's an open bracket without its matching pair, // move the upper endpoint back to the bracket if (!enclosed_title) { auto last_bracket = token_end; bool bracket_open = false; for (auto token = token_begin; token != token_end; ++token) { if (token->category == kBracket) { last_bracket = token; bracket_open = !bracket_open; } } if (bracket_open) token_end = last_bracket; } // If the interval ends with an enclosed group (e.g. "Anime Title [Fansub]"), // move the upper endpoint back to the beginning of the group. We ignore // parentheses in order to keep certain groups (e.g. "(TV)") intact. if (!enclosed_title) { auto token = FindPreviousToken(tokens_, token_end, kFlagNotDelimiter); while (CheckTokenCategory(token, kBracket) && token->content.front() != ')') { token = FindPreviousToken(tokens_, token, kFlagBracket); if (token != tokens_.end()) { token_end = token; token = FindPreviousToken(tokens_, token_end, kFlagNotDelimiter); } } } // Build anime title BuildElement(kElementAnimeTitle, false, token_begin, token_end); }