inline boost::string_ref extract_word(boost::string_ref string) { const char spaces[] = " \t\r\n"; string = remove_leading_spaces(string, spaces); auto new_size = string.find_first_of(spaces); if (new_size == boost::string_ref::npos) new_size = string.size(); return boost::string_ref(string.data(), new_size); }
bool HttpAbpBaseFilter::IsMatch(boost::string_ref data, const HttpAbpFilterSettings dataSettings, boost::string_ref dataHost) const { if (!SettingsApply(dataSettings, m_settings)) { return false; } size_t i = 0; auto len = m_ruleParts.size(); size_t lastMatch = 0; for (i = 0; i < m_ruleParts.size(); ++i) { switch (m_rulePartTypes[i]) { // Anchored address matching is basically a confusing way to say that we // must match against the host of the request, AFAIK. // // However, we have a double wammy. If we match against the host, we // need to then find that same matched string in the full request and // substring the data from beyond our matched address string. This is a // PITA and a bit of a waste, but we check the dataHost member first // specifically, to avoid false positives, such as Google search results // that embed a URL we're trying to match against in GET parameters. case RulePartType::AnchoredAddress: { auto hostLen = dataHost.size(); auto plen = m_ruleParts[i].size(); if (plen <= hostLen) { auto res = dataHost.find(m_ruleParts[i]); if (res != boost::string_ref::npos) { auto hostInReqPos = data.find(dataHost); if (hostInReqPos != boost::string_ref::npos) { lastMatch = hostInReqPos + res + plen; continue; } } } return false; } break; case RulePartType::Wildcard: { // Wildcard, so as long as we have one additional character, we can move on. if (lastMatch + 1 <= data.size()) { ++lastMatch; continue; } return false; } break; case RulePartType::Separator: { if (lastMatch < data.size()) { data = data.substr(lastMatch); auto sepPosition = data.find_first_of(SeparatorStrRef); if (sepPosition != boost::string_ref::npos) { lastMatch = sepPosition + 1; continue; } } return false; } break; case RulePartType::StringLiteral: { if (lastMatch < data.size()) { data = data.substr(lastMatch); size_t literalTextPosition = data.find(m_ruleParts[i]); if(literalTextPosition != boost::string_ref::npos) { lastMatch = literalTextPosition + m_ruleParts[i].size(); continue; } } return false; } break; // Must be an exact match. case RulePartType::RequestLiteralMatch: { return util::string::Equal(data, m_ruleParts[i]); } break; // Basically just a substring match against the start of the request. case RulePartType::RequestLiteralPartialMatch: { auto plen = m_ruleParts[i].size(); auto reqSize = data.size(); if (plen <= reqSize) { auto sub = data.substr(0, plen); if (util::string::Equal(m_ruleParts[i], sub)) { lastMatch = plen; continue; } } return false; } break; } } // All matches were found successfully so, we matched return true; }
/* irc_inline() - 1 single line received from serv */ void server::p_inline (const boost::string_ref& text) { session *sess; char *type; char *word[PDIWORDS+1]; char *word_eol[PDIWORDS+1]; message_tags_data tags_data = message_tags_data(); std::string pdibuf(text.size(), '\0'); sess = this->front_session; /* Python relies on this */ word[PDIWORDS] = NULL; word_eol[PDIWORDS] = NULL; std::string buf; if (text.starts_with('@')) { auto sep = text.find_first_of(' '); if (sep == boost::string_ref::npos) return; /* skip the '@' */ auto tags = text.substr(1, sep - 1); buf = text.substr(sep + 1).to_string(); handle_message_tags(*this, tags, tags_data); } else { buf = text.to_string(); } url_check_line(buf.data(), buf.size()); /* split line into words and words_to_end_of_line */ process_data_init (&pdibuf[0], &buf[0], word, word_eol, false, false); if (buf[0] == ':') { /* find a context for this message */ if (this->is_channel_name (word[3])) { auto tmp = find_channel (word[3]); if (tmp) sess = &(*tmp); } /* for server messages, the 2nd word is the "message type" */ type = word[2]; word[0] = type; word_eol[1] = &buf[0]; /* keep the ":" for plugins */ if (plugin_emit_server(sess, type, word, word_eol, tags_data.timestamp)) { return; } word[1]++; word_eol[1] = &buf[1]; /* but not for HexChat internally */ } else { word[0] = type = word[1]; if (plugin_emit_server(sess, type, word, word_eol, tags_data.timestamp)) { return; } } if (buf[0] != ':') { process_named_servermsg (sess, &buf[0], word[0], word_eol, &tags_data); return; } /* see if the second word is a numeric */ std::locale locale; if (std::isdigit (word[2][0], locale)) { char* t = word_eol[4]; if (*t == ':') t++; process_numeric (sess, atoi (word[2]), word, word_eol, t, &tags_data); } else { process_named_msg (sess, type, word, word_eol, &tags_data); } }