Example #1
0
void NaughtyFilter::checkPICSagainstoption(String s, const char *l, int opt, std::string m)
{
	if (s.indexOf(l) != -1) {
		// if the rating contains the label then:
		int i = 0;
		// get the rating label value
		s = s.after(l);
		if (s.indexOf(" ") != -1) {
			//remove anything after it
			s = s.before(" ");
		}
		// sanity checking
		if (s.length() > 0) {
			i = s.toInteger();  // convert the value in a String to an integer
			if (opt < i) {	// check its value against the option in config file
				isItNaughty = true;  // must be over limit
				whatIsNaughty = m + " ";
				message_no = 1000;
				whatIsNaughty += o.language_list.getTranslation(1000);
				// PICS labeling level exceeded on the above site.
				whatIsNaughtyCategories = "PICS";
				whatIsNaughtyLog = whatIsNaughty;
			}
		}
	}
}
int HTTPHandler::sendGetRequest(String url)
{
    std::string ip;
    String request;
    String host;

#if 0
    /* get the host and url */
    if(url.contains("/"))
    {
        host = url.before("/");
        url = url.after("/");
    }
    else
    {
        host = url;
        url = "";
    }

    if(!url.empty())
    {
    request = "GET " + url + " HTTP 1.0\r\n";
    request += "Host: " + host + "\r\n\r\n";
    }
    else
    {
    }
#else
    if(url.contains("/"))
    {
        host = url.before("/");
    }
    else
    {
        host = url;
    }
    request = "GET http://" + url + " HTTP/1.0\r\n\r\n";
#endif

    /* get the ip addr */
    hostname_to_ip(host,ip);

    mpSocket = new Socket(ip,80);
    return mpSocket->write(request);
}
Example #3
0
	bool Potential1210::extractSection
		(ForceFieldParameters& parameters, const String& section_name) 
	{

		// clear the fields first

		clear();

		// check whether the parameters are valid
		if (!parameters.isValid())
		{
			return false;
		}
			

		// extract the basis information
		ParameterSection::extractSection(parameters, section_name);
		
		// check whether all variables we need are defined, terminate otherwise
		if (!hasVariable("A") || !hasVariable("B"))
		{
			return false;
		}

		// build a two dimensional array of the atom types
		// loop variable
		Size	i;

		AtomTypes& atom_types = parameters.getAtomTypes();
		number_of_atom_types_ = atom_types.getNumberOfTypes();
		
		// allocate two onedimensional fields for the two parameters
		A_.resize(number_of_atom_types_ * number_of_atom_types_);
		B_.resize(number_of_atom_types_ * number_of_atom_types_);
		is_defined_.resize(number_of_atom_types_ * number_of_atom_types_);

		for (i = 0; i < number_of_atom_types_ * number_of_atom_types_; i++) 
		{
			is_defined_[i] = false;
		}

		StringHashMap<Index>::Iterator it;

		// determine the factor to convert the parameters to the standard units used
		// as a default, energies are assumend to be in kJ/mol and distances in Angstrom
		double factor_A = 1.0;
		double factor_B = 1.0;
		if (options.has("unit_A"))
		{
			if (options["unit_A"] == "kcal/mol*A^12")
			{
				factor_A = Constants::JOULE_PER_CAL;
			} 
			else 
			{
				Log.warn() << "unknown unit for parameter A: " << options["unit_A"] << endl;
			}
		}	
		
		if (options.has("unit_B"))
		{
			if (options["unit_B"] == "kcal/mol*A^10")
			{
				factor_B = Constants::JOULE_PER_CAL;
			} 
			else 
			{
				Log.warn() << "unknown unit for parameter B: " << options["unit_B"] << endl;
			}
		}	
		

		Atom::Type		type_I;
		Atom::Type		type_J;
		String				type_name_I;
		String				type_name_J;
		String				key;
		Index					index = 0;

		for (it = section_entries_.begin(); !(it == section_entries_.end()); ++it)
		{
			key = (*it).first;
			if ((key.size() > 0) && (key.find_first_of(" ", 0) > 0)) 
			{
				type_name_I = key.before(" ", 0);
				type_name_J = key.after(" ", 0);
				if ((atom_types.hasType(type_name_I)) && (atom_types.hasType(type_name_J))) 
				{
					type_I = atom_types.getType(type_name_I);
					type_J = atom_types.getType(type_name_J);
					index = (Index)(type_I * number_of_atom_types_ + type_J);
					is_defined_[index] = true;
					A_ [index] = getValue(key, "A").toFloat() * factor_A;
					B_ [index] = getValue(key, "B").toFloat() * factor_B;
					index = (Index)(type_I + number_of_atom_types_ * type_J);
					is_defined_[index] = true;
					A_ [index] = getValue(key, "A").toFloat() * factor_A;
					B_ [index] = getValue(key, "B").toFloat() * factor_B;
				}
			}
		}

		return true;
	}
Example #4
0
// the meat of the process 
void NaughtyFilter::checkPICSrating(std::string label, unsigned int filtergroup)
{
	(*o.fg[filtergroup]).pics2.match(label.c_str());
	if (!(*o.fg[filtergroup]).pics2.matched()) {
		return;
	}			// exit if not found
	String lab(label.c_str());  // convert to a String for easy manip
	String r;
	String service;
	for (int i = 0; i < (*o.fg[filtergroup]).pics2.numberOfMatches(); i++) {
		r = (*o.fg[filtergroup]).pics2.result(i).c_str();  // ditto
		r = r.after("(");
		r = r.before(")");  // remove the brackets

		// Only check the substring of lab that is between
		// the start of lab (or the end of the previous match)
		// and the start of this rating.
		// It is possible to have multiple ratings in one pics-label.
		// This is done on e.g. http://www.jesusfilm.org/
		if (i == 0) {
			service = lab.subString(0, (*o.fg[filtergroup]).pics2.offset(i));
		} else {
			service = lab.subString((*o.fg[filtergroup]).pics2.offset(i - 1) + (*o.fg[filtergroup]).pics2.length(i - 1), (*o.fg[filtergroup]).pics2.offset(i));
		}

		if (service.contains("safesurf")) {
			checkPICSratingSafeSurf(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("evaluweb")) {
			checkPICSratingevaluWEB(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("microsys")) {
			checkPICSratingCyberNOT(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("icra")) {
			checkPICSratingICRA(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("rsac")) {
			checkPICSratingRSAC(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("weburbia")) {
			checkPICSratingWeburbia(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("vancouver")) {
			checkPICSratingVancouver(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("icec")) {
			checkPICSratingICEC(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("safenet")) {
			checkPICSratingSafeNet(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		// check label for word denoting rating system then pass on to the
		// appropriate function the rating String.
	}
}
Example #5
0
// check the phrase lists
void NaughtyFilter::checkphrase(char *file, off_t filelen, const String *url, const String *domain,
	unsigned int filtergroup, unsigned int phraselist, int limit, bool searchterms)
{
	int weighting = 0;
	int cat;
	std::string weightedphrase;
	
	// checkme: translate this?
	String currcat("Embedded URLs");

	// found categories list & reusable iterators
	std::map<int, listent> listcategories;

	// check for embedded references to banned sites/URLs.
	// have regexes that check for URLs in pages (look for attributes (src, href, javascript location)
	// or look for protocol strings (in which case, incl. ftp)?) and extract them.
	// then check the extracted list against the banned site/URL lists.
	// ADs category lists do not want to add to the possibility of a site being banned.
	// Exception lists are not checked.
	// Do not do full-blown category retrieval/duplicate checking; simply add the
	// "Embedded URLs" category.
	// Put a warning next to the option in the config file that this will take lots of CPU.
	// Support phrase mode 1/2 distinction (duplicate sites/URLs).
	// Have weight configurable per filter group, not globally or with a list directive - 
	//   a weight of 0 will disable the option, effectively making this functionality per-FG itself.

	// todo: if checkphrase is passed the domain & existing URL, it can create full URLs from relative ones.
	// if a src/href URL starts with a /, append it to the domain; otherwise, append it to the existing URL.
	// chop off anything after a ?, run through realPath, then put through the URL lists.

#ifdef HAVE_PCRE
	// if weighted phrases are enabled, and we have been passed a URL and domain, and embedded URL checking is enabled...
	// then check for embedded URLs!
	if (url != NULL && o.fg[filtergroup]->embedded_url_weight > 0) {
		std::map<int, listent>::iterator ourcat;
		bool catinited = false;
		std::map<String, unsigned int> found;
		std::map<String, unsigned int>::iterator founditem;

		String u;
		char* j;

		// check for absolute URLs
		if (absurl_re.match(file)) {
			// each match generates 2 results (because of the brackets in the regex), we're only interested in the first
#ifdef DGDEBUG
			std::cout << "Found " << absurl_re.numberOfMatches()/2 << " absolute URLs:" << std::endl;
#endif
			for (int i = 0; i < absurl_re.numberOfMatches(); i+=2) {
				// chop off quotes
				u = absurl_re.result(i);
				u = u.subString(1,u.length()-2);
#ifdef DGDEBUG
				std::cout << u << std::endl;
#endif
				if ((((j = o.fg[filtergroup]->inBannedSiteList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_site_list]->lastcategory.contains("ADs")))
					|| (((j = o.fg[filtergroup]->inBannedURLList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_url_list]->lastcategory.contains("ADs"))))
				{
					// duplicate checking
					// checkme: this should really be being done *before* we search the lists.
					// but because inBanned* methods do some cleaning up of their own, we don't know the form to check against.
					// we actually want these cleanups do be done before passing to inBanned*/inException* - this would
					// speed up ConnectionHandler a bit too.
					founditem = found.find(j);
					if ((o.fg[filtergroup]->weighted_phrase_mode == 2) && (founditem != found.end())) {
						founditem->second++;
					} else {
						// add the site to the found phrases list
						found[j] = 1;
						if (weightedphrase.length() == 0)
							weightedphrase = "[";
						else
							weightedphrase += " ";
						weightedphrase += j;
						if (!catinited) {
							listcategories[-1] = listent(o.fg[filtergroup]->embedded_url_weight,currcat);
							ourcat = listcategories.find(-1);
							catinited = true;
						} else
							ourcat->second.weight += o.fg[filtergroup]->embedded_url_weight;
					}
				}
			}
		}

		found.clear();

		// check for relative URLs
		if (relurl_re.match(file)) {
			// we don't want any parameters on the end of the current URL, since we append to it directly
			// when forming absolute URLs from relative ones. we do want a / on the end, too.
			String currurl(*url);
			if (currurl.contains("?"))
				currurl = currurl.before("?");
			if (currurl[currurl.length()-1] != '/')
				currurl += "/";

			// each match generates 2 results (because of the brackets in the regex), we're only interested in the first
#ifdef DGDEBUG
			std::cout << "Found " << relurl_re.numberOfMatches()/2 << " relative URLs:" << std::endl;
#endif
			for (int i = 0; i < relurl_re.numberOfMatches(); i+=2) {
				u = relurl_re.result(i);
				
				// can't find a way to negate submatches in PCRE, so it is entirely possible
				// that some absolute URLs have made their way into this list. we don't want them.
				if (u.contains("://"))
					continue;

#ifdef DGDEBUG
				std::cout << u << std::endl;
#endif
				// remove src/href & quotes
				u = u.after("=");
				u.removeWhiteSpace();
				u = u.subString(1,u.length()-2);
				
				// create absolute URL
				if (u[0] == '/')
					u = (*domain) + u;
				else
					u = currurl + u;
#ifdef DGDEBUG
				std::cout << "absolute form: " << u << std::endl;
#endif
				if ((((j = o.fg[filtergroup]->inBannedSiteList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_site_list]->lastcategory.contains("ADs")))
					|| (((j = o.fg[filtergroup]->inBannedURLList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_url_list]->lastcategory.contains("ADs"))))
				{
					// duplicate checking
					// checkme: this should really be being done *before* we search the lists.
					// but because inBanned* methods do some cleaning up of their own, we don't know the form to check against.
					// we actually want these cleanups do be done before passing to inBanned*/inException* - this would
					// speed up ConnectionHandler a bit too.
					founditem = found.find(j);
					if ((o.fg[filtergroup]->weighted_phrase_mode == 2) && (founditem != found.end())) {
						founditem->second++;
					} else {
						// add the site to the found phrases list
						found[j] = 1;
						if (weightedphrase.length() == 0)
							weightedphrase = "[";
						else
							weightedphrase += " ";
						weightedphrase += j;
						if (!catinited) {
							listcategories[-1] = listent(o.fg[filtergroup]->embedded_url_weight,currcat);
							ourcat = listcategories.find(-1);
							catinited = true;
						} else
							ourcat->second.weight += o.fg[filtergroup]->embedded_url_weight;
					}
				}
			}
		}
		if (catinited) {
			weighting = ourcat->second.weight;
			weightedphrase += "]";
#ifdef DGDEBUG
			std::cout << weightedphrase << std::endl;
			std::cout << "score from embedded URLs: " << ourcat->second.weight << std::endl;
#endif
		}
	}
#endif

	std::string bannedphrase;
	std::string exceptionphrase;
	String bannedcategory;
	int type, index, weight, time;
	bool allcmatched = true, bannedcombi = false;
	std::string s1;

	// this line here searches for phrases contained in the list - the rest of the code is all sorting
	// through it to find the categories, weightings, types etc. of what has actually been found.
	std::map<std::string, std::pair<unsigned int, int> > found;
	o.lm.l[phraselist]->graphSearch(found, file, filelen);

	// cache reusable iterators
	std::map<std::string, std::pair<unsigned int, int> >::iterator foundend = found.end();
	std::map<std::string, std::pair<unsigned int, int> >::iterator foundcurrent;

	// look for combinations first
	//if banned must wait for exception later
	std::string combifound;
	std::string combisofar;

	std::vector<int>::iterator combicurrent = o.lm.l[phraselist]->combilist.begin();
	std::map<int, listent>::iterator catcurrent;
	int lowest_occurrences = 0;

	while (combicurrent != o.lm.l[phraselist]->combilist.end()) {
		// Grab the current combination phrase part
		index = *combicurrent;
		// Do stuff if what we have is an end marker (end of one list of parts)
		if (index == -2) {
			// Were all the parts in this combination matched?
			if (allcmatched) {
				type = *(++combicurrent);
				// check this time limit against the list of time limits
				time = *(++combicurrent);
				if (not (o.lm.l[phraselist]->checkTimeAtD(time))) {
					// nope - so don't take any notice of it
#ifdef DGDEBUG
					combicurrent++;
					cat = (*++combicurrent);
					std::cout << "Ignoring combi phrase based on time limits: " << combisofar << "; "
						<< o.lm.l[phraselist]->getListCategoryAtD(cat) << std::endl;
#else
					combicurrent += 2;
#endif
					combisofar = "";
				}
				else if (type == -1) {	// combination exception
					isItNaughty = false;
					isException = true;
					// Combination exception phrase found:
					// Combination exception search term found:
					message_no = searchterms ? 456 : 605;
					whatIsNaughtyLog = o.language_list.getTranslation(message_no);
					whatIsNaughtyLog += combisofar;
					whatIsNaughty = "";
					++combicurrent;
					cat = *(++combicurrent);
					whatIsNaughtyCategories = o.lm.l[phraselist]->getListCategoryAtD(cat);
					return;
				}
				else if (type == 1) {	// combination weighting
					weight = *(++combicurrent);
					weighting += weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : lowest_occurrences);
					if (weight > 0) {
						cat = *(++combicurrent);
						//category index -1 indicates an uncategorised list
						if (cat >= 0) {
							//don't output duplicate categories
							catcurrent = listcategories.find(cat);
							if (catcurrent != listcategories.end()) {
								catcurrent->second.weight += weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : lowest_occurrences);
							} else {
								currcat = o.lm.l[phraselist]->getListCategoryAtD(cat);
								listcategories[cat] = listent(weight,currcat);
							}
						}
					} else {
						// skip past category for negatively weighted phrases
						combicurrent++;
					}
					if (weightedphrase.length() > 0) {
						weightedphrase += "+";
					}
					weightedphrase += "(";
					if (weight < 0) {
						weightedphrase += "-" + combisofar;
					} else {
						weightedphrase += combisofar;
					}
#ifdef DGDEBUG
					std::cout << "found combi weighted phrase ("<< o.fg[filtergroup]->weighted_phrase_mode << "): "
						<< combisofar << " x" << lowest_occurrences << " (per phrase: "
						<< weight << ", calculated: "
						<< (weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : lowest_occurrences)) << ")"
						<< std::endl;
#endif

					weightedphrase += ")";
					combisofar = "";
				}
				else if (type == 0) {	// combination banned
					bannedcombi = true;
					combifound += "(" + combisofar + ")";
					combisofar = "";
					combicurrent += 2;
					cat = *(combicurrent);
					bannedcategory = o.lm.l[phraselist]->getListCategoryAtD(cat);
				}
			} else {
				// We had an end marker, but not all the parts so far were matched.
				// Reset the match flag ready for the next chain, and advance to its first part.
				allcmatched = true;
				combicurrent += 4;
				lowest_occurrences = 0;
			}
		} else {
			// We didn't get an end marker - just an individual part.
			// If all parts in the current chain have been matched so far, look for this one as well.
			if (allcmatched) {
				s1 =o.lm.l[phraselist]->getItemAtInt(index);
				if ((foundcurrent = found.find(s1)) == foundend) {
					allcmatched = false;
					combisofar = "";
				} else {
					if (combisofar.length() > 0) {
						combisofar += ", ";
					}
					combisofar += s1;
					// also track lowest number of times any one part occurs in the text
					// as this will correspond to the number of times the whole chain occurs
					if ((lowest_occurrences == 0) || (lowest_occurrences > foundcurrent->second.second)) {
						lowest_occurrences = foundcurrent->second.second;
					}
				}
			}
		}
		// Advance to the next part in the current chain
		combicurrent++;
	}

	// even if we already found a combi ban, we must still wait; there may be non-combi exceptions to follow

	// now check non-combi phrases
	foundcurrent = found.begin();
	while (foundcurrent != foundend) {
		// check time for current phrase
		if (not o.lm.l[phraselist]->checkTimeAt(foundcurrent->second.first)) {
#ifdef DGDEBUG
			std::cout << "Ignoring phrase based on time limits: "
				<< foundcurrent->first << ", "
				<< o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first) << std::endl;
#endif
			foundcurrent++;
			continue;
		}
		// 0=banned, 1=weighted, -1=exception, 2=combi, 3=weightedcombi
		type = o.lm.l[phraselist]->getTypeAt(foundcurrent->second.first);
		if (type == 0) {
			// if we already found a combi ban, we don't need to know this stuff
			if (!bannedcombi) {
				isItNaughty = true;
				bannedphrase = foundcurrent->first;
				bannedcategory = o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first, &cat);
			}
		}
		else if (type == 1) {
			// found a weighted phrase - either add one lot of its score, or one lot for every occurrence, depending on phrase filtering mode
			weight = o.lm.l[phraselist]->getWeightAt(foundcurrent->second.first) * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : foundcurrent->second.second);
			weighting += weight;
			if (weight > 0) {
				currcat = o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first, &cat);
				if (cat >= 0) {
					//don't output duplicate categories
					catcurrent = listcategories.find(cat);
					if (catcurrent != listcategories.end()) {
						// add one or N times the weight to this category's score
						catcurrent->second.weight += weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : foundcurrent->second.second);
					} else {
						listcategories[cat] = listent(weight,currcat);
					}
				}
			}

			if (o.show_weighted_found) {
				if (weightedphrase.length() > 0) {
					weightedphrase += "+";
				}
				if (weight < 0) {
					weightedphrase += "-";
				}

				weightedphrase += foundcurrent->first;
			}
#ifdef DGDEBUG
			std::cout << "found weighted phrase ("<< o.fg[filtergroup]->weighted_phrase_mode << "): "
				<< foundcurrent->first << " x" << foundcurrent->second.second << " (per phrase: "
				<< o.lm.l[phraselist]->getWeightAt(foundcurrent->second.first)
				<< ", calculated: " << weight << ")" << std::endl;
#endif
		}
		else if (type == -1) {
			isException = true;
			isItNaughty = false;
			// Exception phrase found:
			// Exception search term found:
			message_no = searchterms ? 457 : 604;
			whatIsNaughtyLog = o.language_list.getTranslation(message_no);
			whatIsNaughtyLog += foundcurrent->first;
			whatIsNaughty = "";
			whatIsNaughtyCategories = o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first, NULL);
			return;  // no point in going further
		}
		foundcurrent++;
	}

#ifdef DGDEBUG
	std::cout << "WEIGHTING: " << weighting << std::endl;
#endif

	// store the lowest negative weighting or highest positive weighting out of all filtering runs, preferring to store positive weightings.
	if ((weighting < 0 && naughtiness <= 0 && weighting < naughtiness) || (naughtiness >= 0 && weighting > naughtiness) || (naughtiness < 0 && weighting > 0) ) {
		naughtiness = weighting;
	}

#ifdef DGDEBUG
	std::cout << "NAUGHTINESS: " << naughtiness << std::endl;
#endif

	// *now* we can safely get down to the whole banning business!

	if (bannedcombi) {
		isItNaughty = true;
		// Banned combination phrase found:
		// Banned combination search term found:
		message_no = searchterms ? 452: 400;
		whatIsNaughtyLog = o.language_list.getTranslation(message_no);
		whatIsNaughtyLog += combifound;
		// Banned combination phrase found.
		// Banned combination search term found.
		whatIsNaughty = o.language_list.getTranslation(searchterms ? 453 : 401);
		whatIsNaughtyCategories = bannedcategory.toCharArray();
		return;
	}

	if (isItNaughty) {
		// Banned phrase found:
		// Banned search term found:
		message_no = searchterms ? 450: 300;
		whatIsNaughtyLog = o.language_list.getTranslation(message_no);
		whatIsNaughtyLog += bannedphrase;
		// Banned phrase found.
		// Banned search term found.
		whatIsNaughty = o.language_list.getTranslation(searchterms ? 451 : 301);
		whatIsNaughtyCategories = bannedcategory.toCharArray();
		return;
	}

	if (weighting > limit) {
		isItNaughty = true;
		// Weighted phrase limit of
		// Weighted search term limit of
		message_no = searchterms ? 454: 401;
		whatIsNaughtyLog = o.language_list.getTranslation(message_no);
		whatIsNaughtyLog += String(limit).toCharArray();
		whatIsNaughtyLog += " : ";
		whatIsNaughtyLog += String(weighting).toCharArray();
		if (o.show_weighted_found) {
			whatIsNaughtyLog += " (";
			whatIsNaughtyLog += weightedphrase;
			whatIsNaughtyLog += ")";
		}
		// Weighted phrase limit exceeded.
		// Weighted search term limit exceeded.
		whatIsNaughty = o.language_list.getTranslation(searchterms ? 455 : 403);
		// Generate category list, sorted with highest scoring first.
		bool nonempty = false;
		bool belowthreshold = false;
		String categories;
		std::deque<listent> sortable_listcategories;
		catcurrent = listcategories.begin();
		while (catcurrent != listcategories.end()) {
			sortable_listcategories.push_back(catcurrent->second);
			catcurrent++;
		}
		std::sort(sortable_listcategories.begin(), sortable_listcategories.end());
		std::deque<listent>::iterator k = sortable_listcategories.begin();
		while (k != sortable_listcategories.end()) {
			// if category display threshold is in use, apply it
			if (!belowthreshold && (o.fg[filtergroup]->category_threshold > 0)
				&& (k->weight < o.fg[filtergroup]->category_threshold))
			{
				whatIsNaughtyDisplayCategories = categories.toCharArray();
				belowthreshold = true;
				usedisplaycats = true;
			}
			if (k->string.length() > 0) {
				if (nonempty) categories += ", ";
				categories += k->string;
				nonempty = true;
			}
			k++;
			// if category threshold is set to show only the top category,
			// everything after the first loop is below the threshold
			if (!belowthreshold && o.fg[filtergroup]->category_threshold < 0) {
				whatIsNaughtyDisplayCategories = categories.toCharArray();
				belowthreshold = true;
				usedisplaycats = true;
			}
		}
		whatIsNaughtyCategories = categories.toCharArray();
		return;
	}
	// whatIsNaughty is what is displayed in the browser
	// whatIsNaughtyLog is what is logged in the log file if at all
}
Example #6
0
// check data received from ICAP server and interpret as virus name & return value
int icapinstance::doScan(Socket & icapsock, HTTPHeader * docheader, const char* object, unsigned int objectsize, NaughtyFilter * checkme)
{
	char *data = new char[8192];
	try {
		String line;
		int rc = icapsock.getLine(data, 8192, o.content_scanner_timeout);
		if (rc == 0)
			return ICAP_NODATA;
		line = data;
#ifdef DGDEBUG
		std::cout << "reply from icap: " << line << std::endl;
#endif
		// reply is of the format:
		// ICAP/1.0 204 No Content Necessary (etc)

		String returncode(line.after(" ").before(" "));

		if (returncode == "204") {
#ifdef DGDEBUG
			std::cerr << "ICAP says clean!" << std::endl;
#endif
			delete[]data;
			return DGCS_CLEAN;
		} else if (returncode == "100") {
#ifdef DGDEBUG
			std::cerr << "ICAP says continue!" << std::endl;
#endif
			// discard rest of headers (usually just a blank line)
			// this is so we are in the right place in the data stream to
			// call doScan() again later, because people like Symantec seem
			// to think sending code 100 then code 204 one after the other
			// is not an abuse of the ICAP specification.
			while (icapsock.getLine(data, 8192, o.content_scanner_timeout) > 0) {
				if (data[0] == 13)
					break;
			}
			delete[]data;
			return ICAP_CONTINUE;
		}
		else if (returncode == "200") {
#ifdef DGDEBUG
			std::cerr << "ICAP says maybe not clean!" << std::endl;
#endif
			while (icapsock.getLine(data, 8192, o.content_scanner_timeout) > 0) {
				if (data[0] == 13)	// end marker
					break;
				line = data;
				// Symantec's engine gives us the virus name in the ICAP headers
				if (supportsXIF && line.startsWith("X-Infection-Found")) {
#ifdef DGDEBUG
					std::cout << "ICAP says infected! (X-Infection-Found)" << std::endl;
#endif
					lastvirusname = line.after("Threat=").before(";");
					delete[]data;
					
					blockFile(NULL,NULL,checkme);
					return DGCS_INFECTED;
				}
			}
			// AVIRA's Antivir gives us 200 in all cases, so
			// - unfortunately - we must pay attention to the encapsulated
			// header/body.
			if (needsBody) {
				// grab & compare the HTTP return code from modified response
				// if it's been modified, assume there's an infection
				icapsock.getLine(data, 8192, o.content_scanner_timeout);
				line = data;
#ifdef DGDEBUG
				std::cout << "Comparing original return code to modified:" << std::endl << docheader->header.front() << std::endl << line << std::endl;
#endif
				int respmodReturnCode = line.after(" ").before(" ").toInteger();
				if (respmodReturnCode != docheader->returnCode()) {
#ifdef DGDEBUG
					std::cerr << "ICAP says infected! (returned header comparison)" << std::endl;
#endif
					delete[] data;
					lastvirusname = "Unknown";

					blockFile(NULL,NULL,checkme);
					return DGCS_INFECTED;
				}
				// ok - headers were identical, so look at encapsulated body
				// discard the rest of the encapsulated headers
				while (icapsock.getLine(data, 8192, o.content_scanner_timeout) > 0) {
					if (data[0] == 13)
						break;
				}
				// grab body chunk size
#ifdef DGDEBUG
				std::cout << "Comparing original body data to modified" << std::endl;
#endif
				icapsock.getLine(data, 8192, o.content_scanner_timeout);
				line = data;
				int bodysize = line.hexToInteger();
				// get, say, the first 100 bytes and compare them to what we
				// originally sent to see if it has been modified
				unsigned int chunksize = (bodysize < 100) ? bodysize : 100;
				if (chunksize > objectsize)
					chunksize = objectsize;
				icapsock.readFromSocket(data, chunksize, 0, o.content_scanner_timeout);
				if (memcmp(data, object, chunksize) == 0) {
#ifdef DGDEBUG
					std::cerr << "ICAP says clean!" << std::endl;
#endif
					delete[]data;
					return DGCS_CLEAN;
				} else {
#ifdef DGDEBUG
					std::cerr << "ICAP says infected! (body byte comparison)" << std::endl;
#endif
					delete[] data;
					lastvirusname = "Unknown";

					blockFile(NULL,NULL,checkme);
					return DGCS_INFECTED;
				}
			}
			// even if we don't find an X-Infection-Found header,
			// the file is still infected!
#ifdef DGDEBUG
			std::cerr << "ICAP says infected! (no further tests)" << std::endl;
#endif
			delete[] data;
			lastvirusname = "Unknown";

			blockFile(NULL,NULL,checkme);
			return DGCS_INFECTED;
		}
		else if (returncode == "404") {
#ifdef DGDEBUG
			std::cerr << "ICAP says no such service!" << std::endl;
#endif
			lastmessage = "ICAP reports no such service";
			syslog(LOG_ERR, "ICAP reports no such service; check your server URL");
			delete[]data;
			return DGCS_SCANERROR;
		} else {
#ifdef DGDEBUG
			std::cerr << "ICAP returned unrecognised response code: " << returncode << std::endl;
#endif
			lastmessage = "ICAP returned unrecognised response code.";
			syslog(LOG_ERR, "ICAP returned unrecognised response code: %s", returncode.toCharArray());
			delete[]data;
			return DGCS_SCANERROR;
		}
		delete[]data;
	}
	catch(std::exception & e) {
#ifdef DGDEBUG
		std::cerr << "Exception getting reply from ICAP: " << e.what() << std::endl;
#endif
		lastmessage = "Exception getting reply from ICAP.";
		syslog(LOG_ERR, "Exception getting reply from ICAP: %s", e.what());
		delete[]data;
		return DGCS_SCANERROR;
	}
	// it is generally NOT a good idea, when using virus scanning,
	// to continue as if nothing went wrong by default!
	return DGCS_SCANERROR;
}
Example #7
0
// initialise the plugin - determine icap ip, port & url
int icapinstance::init(void* args)
{
	// always include these lists
	if (!readStandardLists()) {
		return DGCS_ERROR;
	}

	icapurl = cv["icapurl"];  // format: icap://icapserver:1344/avscan
	if (icapurl.length() < 3) {
		if (!is_daemonised)
			std::cerr << "Error reading icapurl option." << std::endl;
		syslog(LOG_ERR, "Error reading icapurl option.");
		return DGCS_ERROR;
		// it would be far better to do a test connection
	}
	icaphost = icapurl.after("//");
	icapport = icaphost.after(":").before("/").toInteger();
	if (icapport == 0) {
		icapport = 1344;
	}
	icaphost = icaphost.before("/");
	if (icaphost.contains(":")) {
		icaphost = icaphost.before(":");
	}
	struct hostent *host;
	if ((host = gethostbyname(icaphost.toCharArray())) == 0) {
		if (!is_daemonised)
			std::cerr << "Error resolving icap host address." << std::endl;
		syslog(LOG_ERR, "Error resolving icap host address.");
		return DGCS_ERROR;
	}
	icapip = inet_ntoa(*(struct in_addr *) host->h_addr_list[0]);

#ifdef DGDEBUG
	std::cerr << "ICAP server address:" << icapip << std::endl;
#endif

	// try to connect to the ICAP server and perform an OPTIONS request
	Socket icapsock;
	try {
		if (icapsock.connect(icapip.toCharArray(), icapport) < 0) {
			throw std::runtime_error("Could not connect to server");
		}
		String line("OPTIONS " + icapurl + " ICAP/1.0\r\nHost: " + icaphost + "\r\n\r\n");
		icapsock.writeString(line.toCharArray());
		// parse the response
		char buff[8192];
		// first line - look for 200 OK
		icapsock.getLine(buff, 8192, o.content_scanner_timeout);
		line = buff;
#ifdef DGDEBUG
		std::cout << "ICAP/1.0 OPTIONS response:" << std::endl << line << std::endl;
#endif
		if (line.after(" ").before(" ") != "200") {
			if (!is_daemonised)
				std::cerr << "ICAP response not 200 OK" << std::endl;
			syslog(LOG_ERR, "ICAP response not 200 OK");
			return DGCS_WARNING;
			//throw std::runtime_error("Response not 200 OK");
		}
		while (icapsock.getLine(buff, 8192, o.content_scanner_timeout) > 0) {
			line = buff;
#ifdef DGDEBUG
			std::cout << line << std::endl;
#endif
			if (line.startsWith("\r")) {
				break;
			}
			else if (line.startsWith("Preview:")) {
				usepreviews = true;
				previewsize = line.after(": ").toInteger();
			}
			else if (line.startsWith("Server:")) {
				if (line.contains("AntiVir-WebGate")) {
					needsBody = true;
				}
			}
			else if (line.startsWith("X-Allow-Out:")) {
				if (line.contains("X-Infection-Found")) {
					supportsXIF = true;
				}
			}
		}
		icapsock.close();
	} catch(std::exception& e) {
		if (!is_daemonised)
			std::cerr << "ICAP server did not respond to OPTIONS request: " << e.what() << std::endl;
		syslog(LOG_ERR, "ICAP server did not respond to OPTIONS request: %s", e.what());
		return DGCS_ERROR;
	}
#ifdef DGDEBUG
	if (usepreviews)
		std::cout << "Message previews enabled; size: " << previewsize << std::endl;
	else
		std::cout << "Message previews disabled" << std::endl;
#endif
	return DGCS_OK;
}
Example #8
0
bool IPList::ifsreadIPMelangeList(std::ifstream *input, bool checkendstring, const char *endstring)
{
    // compile regexps for determining whether a list entry is an IP, a subnet (IP + mask), or a range
    RegExp matchIP, matchSubnet, matchRange, matchCIDR;
#ifdef HAVE_PCRE
    matchIP.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$");
    matchSubnet.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$");
    matchSubnet.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$");
    matchCIDR.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}/\\d{1,2}$");
    matchRange.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}-\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$");
#else
    matchIP.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$");
    matchSubnet.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}/[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$");
    matchCIDR.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}/[0-9]{1,2}$");
    matchRange.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}-[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$");
#endif
    RegResult Rre;

    // read in the file
    String line;
    char buffer[2048];
    while (input) {
        if (!input->getline(buffer, sizeof(buffer))) {
            break;
        }
        line = buffer;
        if (checkendstring && line.startsWith(endstring)) {
            break;
        }

        // ignore comments
        if (buffer[0] == '#')
            continue;
        // ignore blank lines
        if (strlen(buffer) < 7)
            continue;
#ifdef DGDEBUG
        std::cout << "line: " << line << std::endl;
#endif
        // store the IP address (numerically, not as a string) and filter group in either the IP list, subnet list or range list
        if (matchIP.match(line.toCharArray(),Rre)) {
            struct in_addr address;
            if (inet_aton(line.toCharArray(), &address)) {
                uint32_t addr = ntohl(address.s_addr);
                iplist.push_back(addr);
            }
        } else if (matchSubnet.match(line.toCharArray(),Rre)) {
            struct in_addr address;
            struct in_addr addressmask;
            String subnet(line.before("/"));
            String mask(line.after("/"));
            if (inet_aton(subnet.toCharArray(), &address) && inet_aton(mask.toCharArray(), &addressmask)) {
                ipl_subnetstruct s;
                uint32_t addr = ntohl(address.s_addr);
                s.mask = ntohl(addressmask.s_addr);
                // pre-mask the address for quick comparison
                s.maskedaddr = addr & s.mask;
                ipsubnetlist.push_back(s);
            }
        } else if (matchCIDR.match(line.toCharArray(),Rre)) {
            struct in_addr address;
            struct in_addr addressmask;
            String subnet(line.before("/"));
            String cidr(line.after("/"));
            int m = cidr.toInteger();
            int host_part = 32 - m;
            if (host_part > -1) {
                String mask = (0xFFFFFFFF << host_part);
                if (inet_aton(subnet.toCharArray(), &address) && inet_aton(mask.toCharArray(), &addressmask)) {
                    ipl_subnetstruct s;
                    uint32_t addr = ntohl(address.s_addr);
                    s.mask = ntohl(addressmask.s_addr);
                    // pre-mask the address for quick comparison
                    s.maskedaddr = addr & s.mask;
                    ipsubnetlist.push_back(s);
                }
            }
        } else if (matchRange.match(line.toCharArray(),Rre)) {
            struct in_addr addressstart;
            struct in_addr addressend;
            String start(line.before("-"));
            String end(line.after("-"));
            if (inet_aton(start.toCharArray(), &addressstart) && inet_aton(end.toCharArray(), &addressend)) {
                ipl_rangestruct r;
                r.startaddr = ntohl(addressstart.s_addr);
                r.endaddr = ntohl(addressend.s_addr);
                iprangelist.push_back(r);
            }
        }
        // hmmm. the line didn't match any of our regular expressions.
        // assume it's a hostname.
        else {
            line.toLower();
            hostlist.push_back(line);
        }
    }
#ifdef DGDEBUG
    std::cout << "starting sort" << std::endl;
#endif
    std::sort(iplist.begin(), iplist.end());
    std::sort(hostlist.begin(), hostlist.end());
#ifdef DGDEBUG
    std::cout << "sort complete" << std::endl;
    std::cout << "ip list dump:" << std::endl;
    std::vector<uint32_t>::iterator i = iplist.begin();
    while (i != iplist.end()) {
        std::cout << "IP: " << *i << std::endl;
        ++i;
    }
    std::cout << "subnet list dump:" << std::endl;
    std::list<ipl_subnetstruct>::iterator j = ipsubnetlist.begin();
    while (j != ipsubnetlist.end()) {
        std::cout << "Masked IP: " << j->maskedaddr << " Mask: " << j->mask << std::endl;
        ++j;
    }
    std::cout << "range list dump:" << std::endl;
    std::list<ipl_rangestruct>::iterator k = iprangelist.begin();
    while (k != iprangelist.end()) {
        std::cout << "Start IP: " << k->startaddr << " End IP: " << k->endaddr << std::endl;
        ++k;
    }
    std::cout << "host list dump:" << std::endl;
    std::vector<String>::iterator l = hostlist.begin();
    while (l != hostlist.end()) {
        std::cout << "Hostname: " << *l << std::endl;
        ++l;
    }
#endif
    return true;
}