void Parser::SearchForIsolatedNumbers() { for (token_container_t::iterator token = tokens_.begin(); token != tokens_.end(); ++token) { if (token->category != kUnknown || !IsNumericString(token->content) || !IsTokenIsolated(token)) continue; int number = StringToInt(token->content); // Anime year if (number >= kAnimeYearMin && number <= kAnimeYearMax) { if (elements_.empty(kElementAnimeYear)) { elements_.insert(kElementAnimeYear, token->content); token->category = kIdentifier; continue; } } // Video resolution if (number == 480 || number == 720 || number == 1080) { // If these numbers are isolated, it's more likely for them to be the // video resolution rather than the episode number. Some fansub groups // use these without the "p" suffix. if (elements_.empty(kElementVideoResolution)) { elements_.insert(kElementVideoResolution, token->content); token->category = kIdentifier; continue; } } } }
bool Parser::CheckAnimeSeasonKeyword(const token_iterator_t token) { auto set_anime_season = [&](token_iterator_t first, token_iterator_t second, const string_t& content) { elements_.insert(kElementAnimeSeason, content); first->category = kIdentifier; second->category = kIdentifier; }; auto previous_token = FindPreviousToken(tokens_, token, kFlagNotDelimiter); if (previous_token != tokens_.end()) { auto number = GetNumberFromOrdinal(previous_token->content); if (!number.empty()) { set_anime_season(previous_token, token, number); return true; } } auto next_token = FindNextToken(tokens_, token, kFlagNotDelimiter); if (next_token != tokens_.end() && IsNumericString(next_token->content)) { set_anime_season(token, next_token, next_token->content); return true; } return false; }
void Parser::SearchForKeywords() { for (auto it = tokens_.begin(); it != tokens_.end(); ++it) { auto& token = *it; if (token.category != kUnknown) continue; auto word = token.content; TrimString(word, L" -"); if (word.empty()) continue; // Don't bother if the word is a number that cannot be CRC if (word.size() != 8 && IsNumericString(word)) continue; // Performs better than making a case-insensitive Find auto keyword = keyword_manager.Normalize(word); ElementCategory category = kElementUnknown; KeywordOptions options; if (keyword_manager.Find(keyword, category, options)) { if (!options_.parse_release_group && category == kElementReleaseGroup) continue; if (!IsElementCategorySearchable(category) || !options.searchable) continue; if (IsElementCategorySingular(category) && !elements_.empty(category)) continue; if (category == kElementAnimeSeasonPrefix) { CheckAnimeSeasonKeyword(it); continue; } else if (category == kElementEpisodePrefix) { if (options.valid) CheckExtentKeyword(kElementEpisodeNumber, it); continue; } else if (category == kElementReleaseVersion) { word = word.substr(1); // number without "v" } else if (category == kElementVolumePrefix) { CheckExtentKeyword(kElementVolumeNumber, it); continue; } } else { if (elements_.empty(kElementFileChecksum) && IsCrc32(word)) { category = kElementFileChecksum; } else if (elements_.empty(kElementVideoResolution) && IsResolution(word)) { category = kElementVideoResolution; } } if (category != kElementUnknown) { elements_.insert(category, word); if (options.identifiable) token.category = kIdentifier; } } }
bool CBanManager::IsValidIPPart ( const char* szIP ) { if ( IsNumericString ( szIP ) ) { int iIP = atoi ( szIP ); if ( iIP >= 0 && iIP < 256 ) return true; } else if ( strcmp ( szIP, "*" ) == 0 ) return true; return false; }
void Parser::SearchForEpisodeNumber() { // List all unknown tokens that contain a number std::vector<size_t> tokens; for (size_t i = 0; i < tokens_.size(); ++i) { auto& token = tokens_.at(i); if (token.category == kUnknown) if (FindNumberInString(token.content) != token.content.npos) tokens.push_back(i); } if (tokens.empty()) return; found_episode_keywords_ = !elements_.empty(kElementEpisodeNumber); // If a token matches a known episode pattern, it has to be the episode number if (SearchForEpisodePatterns(tokens)) return; if (!elements_.empty(kElementEpisodeNumber)) return; // We have previously found an episode number via keywords // From now on, we're only interested in numeric tokens auto not_numeric_string = [&](size_t index) -> bool { return !IsNumericString(tokens_.at(index).content); }; tokens.erase(std::remove_if(tokens.begin(), tokens.end(), not_numeric_string), tokens.end()); if (tokens.empty()) return; // e.g. "01 (176)", "29 (04)" if (SearchForEquivalentNumbers(tokens)) return; // e.g. " - 08" if (SearchForSeparatedNumbers(tokens)) return; // e.g. "[12]", "(2006)" if (SearchForIsolatedNumbers(tokens)) return; // Consider using the last number as a last resort SearchForLastNumber(tokens); }
bool Parser::MatchEpisodePatterns(string_t word, Token& token) { // All patterns contain at least one non-numeric character if (IsNumericString(word)) return false; TrimString(word, L" -"); const bool numeric_front = IsNumericChar(word.front()); const bool numeric_back = IsNumericChar(word.back()); // e.g. "01v2" if (numeric_front && numeric_back) if (MatchSingleEpisodePattern(word, token)) return true; // e.g. "01-02", "03-05v2" if (numeric_front && numeric_back) if (MatchMultiEpisodePattern(word, token)) return true; // e.g. "2x01", "S01E03", "S01-02xE001-150" if (numeric_back) if (MatchSeasonAndEpisodePattern(word, token)) return true; // e.g. "ED1", "OP4a", "OVA2" if (!numeric_front) if (MatchTypeAndEpisodePattern(word, token)) return true; // e.g. "07.5" if (numeric_front && numeric_back) if (MatchFractionalEpisodePattern(word, token)) return true; // e.g. "4a", "111C" if (numeric_front && !numeric_back) if (MatchPartialEpisodePattern(word, token)) return true; // e.g. "#01", "#02-03v2" if (numeric_back) if (MatchNumberSignPattern(word, token)) return true; // U+8A71 is used as counter for stories, episodes of TV series, etc. if (numeric_front) if (MatchJapaneseCounterPattern(word, token)) return true; return false; }
bool Parser::NumberComesBeforeTotalNumber(const token_iterator_t token) { auto next_token = FindNextToken(tokens_, token, kFlagNotDelimiter); if (next_token != tokens_.end()) { if (IsStringEqualTo(next_token->content, L"of")) { auto other_token = FindNextToken(tokens_, next_token, kFlagNotDelimiter); if (other_token != tokens_.end()) { if (IsNumericString(other_token->content)) { SetEpisodeNumber(token->content, *token, false); next_token->category = kIdentifier; other_token->category = kIdentifier; return true; } } } } return false; }
void Parser::SearchForEpisodeNumber() { // List all tokens that contain a number std::vector<size_t> tokens; for (size_t i = 0; i < tokens_.size(); ++i) { auto& token = tokens_.at(i); if (token.category != kUnknown) continue; // Skip previously identified tokens if (FindNumberInString(token.content) != token.content.npos) tokens.push_back(i); } if (tokens.empty()) return; // If a token matches a known episode pattern, it has to be the episode number if (SearchForEpisodePatterns(tokens)) return; // From now on, we're only interested in numeric tokens auto not_numeric_string = [&](size_t index) -> bool { return !IsNumericString(tokens_.at(index).content); }; tokens.erase(std::remove_if(tokens.begin(), tokens.end(), not_numeric_string), tokens.end()); if (tokens.empty()) return; // e.g. "[12]", "(2006)" if (SearchForIsolatedNumbers(tokens)) return; // e.g. " - 08" if (SearchForSeparatedNumbers(tokens)) return; // Consider using the last number as a last resort SearchForLastNumber(tokens); }
bool Parser::SearchForEquivalentNumbers(std::vector<size_t>& tokens) { for (auto token_index = tokens.begin(); token_index != tokens.end(); ++token_index) { auto token = tokens_.begin() + *token_index; if (IsTokenIsolated(token)) continue; // Find the first enclosed, non-delimiter token auto next_token = FindNextToken(tokens_, token, kFlagNotDelimiter); if (next_token != tokens_.end() && next_token->category == kBracket) { next_token = FindNextToken(tokens_, next_token, kFlagEnclosed | kFlagNotDelimiter); } else { continue; } // See if it's an isolated number if (next_token != tokens_.end() && next_token->category == kUnknown && IsTokenIsolated(next_token) && IsNumericString(next_token->content)) { if (IsValidEpisodeNumber(token->content) && IsValidEpisodeNumber(next_token->content)) { auto lower_token = StringToInt(token->content) < StringToInt(next_token->content) ? token : next_token; SetEpisodeNumber(lower_token->content, *token, false); next_token->category = kIdentifier; return true; } } } return false; }
bool Parser::not_numeric_string(size_t index) { return !IsNumericString(tokens_.at(index).content); };
bool CPickup::ReadSpecialData ( void ) { unsigned short usBuffer = 0; // Grab the "posX" data if ( !GetCustomDataFloat ( "posX", m_vecPosition.fX, true ) ) { CLogger::ErrorPrintf ( "Bad/missing 'posX' attribute in <pickup> (line %u)\n", m_uiLine ); return false; } // Grab the "posY" data if ( !GetCustomDataFloat ( "posY", m_vecPosition.fY, true ) ) { CLogger::ErrorPrintf ( "Bad/missing 'posY' attribute in <pickup> (line %u)\n", m_uiLine ); return false; } // Grab the "posZ" data if ( !GetCustomDataFloat ( "posZ", m_vecPosition.fZ, true ) ) { CLogger::ErrorPrintf ( "Bad/missing 'posZ' attribute in <pickup> (line %u)\n", m_uiLine ); return false; } // Put the collision object at the given xyz if ( m_pCollision ) m_pCollision->SetPosition ( m_vecPosition ); // Grab the "type" data char szBuffer [128]; if ( GetCustomDataString ( "type", szBuffer, 128, true ) ) { // Check what type it is m_bIsTypeRandom = false; if ( stricmp ( szBuffer, "health" ) == 0 ) { m_ucType = HEALTH; m_usModel = CPickupManager::GetHealthModel (); } else if ( stricmp ( szBuffer, "armor" ) == 0 ) { m_ucType = ARMOR; m_usModel = CPickupManager::GetArmorModel (); } else if ( IsNumericString ( szBuffer ) ) { // could be a weapon usBuffer = atoi ( szBuffer ); if ( CPickupManager::IsValidWeaponID ( usBuffer ) ) { // its a weapon m_ucType = WEAPON; m_usModel = CPickupManager::GetWeaponModel ( m_ucWeaponType ); } } else if ( stricmp ( szBuffer, "custom" ) == 0 ) { m_ucType = CUSTOM; m_usModel = 1700; } else if ( stricmp ( szBuffer, "random" ) == 0 ) { m_ucType = HEALTH; m_usModel = CPickupManager::GetHealthModel (); m_bIsTypeRandom = true; } else { CLogger::LogPrintf ( "WARNING: Unknown 'type' value in <pickup>; defaulting to \"random\" (line %u)\n", m_uiLine ); m_ucType = HEALTH; m_usModel = CPickupManager::GetHealthModel (); m_bIsTypeRandom = true; } } else { CLogger::ErrorPrintf ( "Bad/missing 'type' attribute in <pickup> (line %u)\n", m_uiLine ); return false; } // Is this a weapon pickup? if ( m_ucType == CPickup::WEAPON || m_bIsTypeRandom ) { // Remember the weapon type m_ucWeaponType = static_cast < unsigned char > ( usBuffer ); m_usModel = CPickupManager::GetWeaponModel ( m_ucWeaponType ); m_bIsWeaponTypeRandom = false; } // Is this a health pickup? if ( m_ucType == CPickup::HEALTH || m_ucType == CPickup::ARMOR || m_bIsTypeRandom ) { // Grab the "health" data if ( GetCustomDataString ( "amount", szBuffer, 128, true ) ) { // Is it random? if ( strcmp ( szBuffer, "random" ) == 0 ) { m_fAmount = 100.0f; m_bIsHealthRandom = true; } else { // Convert the health to an integer and limit it to 100 m_fAmount = static_cast < float > ( atoi ( szBuffer ) ); if ( m_fAmount > 100.0f ) m_fAmount = 100.0f; } } else { m_fAmount = 100.0f; m_bIsHealthRandom = false; } } // Is this a weapon pickup? int iTemp; if ( m_ucType == CPickup::WEAPON || m_bIsTypeRandom ) { // Grab the "ammo" data if ( GetCustomDataInt ( "amount", iTemp, true ) ) { // Limit it to 0-9999 if it was above if ( iTemp > 9999 ) iTemp = 9999; else if ( iTemp < 0 ) iTemp = 0; // Remember it m_usAmmo = static_cast < unsigned short > ( iTemp ); } else { m_usAmmo = 100; } } // Grab the "respawn" data if ( GetCustomDataInt ( "respawn", iTemp, true ) ) { // Make sure it's above 3 seconds if ( iTemp < 3000 ) iTemp = 3000; // Remember it m_ulRespawnIntervals = static_cast < unsigned long > ( iTemp ); } else { m_ulRespawnIntervals = 10000; } // Is this a custom pickup? if ( m_ucType == CPickup::CUSTOM ) { // Grab the "model" data if ( GetCustomDataInt ( "model", iTemp, true ) ) { // Valid id? if ( CObjectManager::IsValidModel ( iTemp ) || iTemp == 370 ) // 370 = jetpack - sort of a hack { // Set the object id m_usModel = static_cast < unsigned short > ( iTemp ); } else { CLogger::ErrorPrintf ( "Bad 'model' id specified in <pickup> (line %u)\n", m_uiLine ); return false; } } else { // Error out if custom is specified but no model id is CLogger::ErrorPrintf ( "Pickup type set to 'custom' but no 'model' id specified (line %u)\n", m_uiLine ); return false; } } if ( GetCustomDataInt ( "dimension", iTemp, true ) ) m_usDimension = static_cast < unsigned short > ( iTemp ); // Success return true; }
void Parser::SearchForKeywords() { for (auto& token : tokens_) { if (token.category != kUnknown) continue; auto word = token.content; TrimString(word); // Don't bother if the word is a number that cannot be CRC if (word.size() != 8 && IsNumericString(word)) continue; // Performs better than making a case-insensitive Find auto keyword = StringToUpperCopy(word); for (int i = kElementIterateFirst; i < kElementIterateLast; i++) { auto category = static_cast<ElementCategory>(i); if (!parse_options.parse_release_group) if (category == kElementReleaseGroup) continue; if (!IsElementCategorySearchable(category)) continue; if (IsElementCategorySingular(category)) if (!elements_.empty(category)) continue; bool add_keyword = false; KeywordOptions options; switch (category) { case kElementFileChecksum: add_keyword = IsCrc32(word); break; case kElementVideoResolution: add_keyword = IsResolution(word); break; default: add_keyword = keyword_manager.Find(category, keyword, options); break; } if (add_keyword) { switch (category) { case kElementReleaseVersion: elements_.insert(category, word.substr(1)); // number without "v" break; default: elements_.insert(category, word); break; } if (options.safe || token.enclosed) token.category = kIdentifier; break; } } } }