Beispiel #1
0
MolecularSimilarity::MolecularSimilarity(String smarts_file)
{
	Path path;
	String file = path.find(smarts_file);
	if(file=="")
	{
		throw BALL::Exception::FileNotFound(__FILE__,__LINE__,smarts_file);
	}
	std::ifstream smart_input(file.c_str());

	// read SMARTS-expression and names for those SMARTS from the specified file
	for(Size i=0; smart_input; i++)
	{
		if(i%300==0) // prevent frequent resizing
		{
			int a = (i/300)+1;
			smarts_.reserve(a*300);
			smart_names_.reserve(a*300);
		}

		String line;
		getline(smart_input,line);
		line.trim();
		if(line!="")
		{
			stringstream lstream(line);
			string s;
			lstream >> s;  // read first word but ignore the following comment (name/description of functional group)
			if(s!="") smarts_.push_back(s);

			if(line.hasSubstring("\t")) smart_names_.push_back(String(line.after("\t")).trim());
			else
			{
				throw BALL::Exception::GeneralException(__FILE__,__LINE__,"MolecularSimilarity error","SMARTS file has wrong format! Maybe tabs are missing.");
			}
		}
	}
Beispiel #2
0
	bool Potential1210::extractSection
		(ForceFieldParameters& parameters, const String& section_name) 
	{

		// clear the fields first

		clear();

		// check whether the parameters are valid
		if (!parameters.isValid())
		{
			return false;
		}
			

		// extract the basis information
		ParameterSection::extractSection(parameters, section_name);
		
		// check whether all variables we need are defined, terminate otherwise
		if (!hasVariable("A") || !hasVariable("B"))
		{
			return false;
		}

		// build a two dimensional array of the atom types
		// loop variable
		Size	i;

		AtomTypes& atom_types = parameters.getAtomTypes();
		number_of_atom_types_ = atom_types.getNumberOfTypes();
		
		// allocate two onedimensional fields for the two parameters
		A_.resize(number_of_atom_types_ * number_of_atom_types_);
		B_.resize(number_of_atom_types_ * number_of_atom_types_);
		is_defined_.resize(number_of_atom_types_ * number_of_atom_types_);

		for (i = 0; i < number_of_atom_types_ * number_of_atom_types_; i++) 
		{
			is_defined_[i] = false;
		}

		StringHashMap<Index>::Iterator it;

		// determine the factor to convert the parameters to the standard units used
		// as a default, energies are assumend to be in kJ/mol and distances in Angstrom
		double factor_A = 1.0;
		double factor_B = 1.0;
		if (options.has("unit_A"))
		{
			if (options["unit_A"] == "kcal/mol*A^12")
			{
				factor_A = Constants::JOULE_PER_CAL;
			} 
			else 
			{
				Log.warn() << "unknown unit for parameter A: " << options["unit_A"] << endl;
			}
		}	
		
		if (options.has("unit_B"))
		{
			if (options["unit_B"] == "kcal/mol*A^10")
			{
				factor_B = Constants::JOULE_PER_CAL;
			} 
			else 
			{
				Log.warn() << "unknown unit for parameter B: " << options["unit_B"] << endl;
			}
		}	
		

		Atom::Type		type_I;
		Atom::Type		type_J;
		String				type_name_I;
		String				type_name_J;
		String				key;
		Index					index = 0;

		for (it = section_entries_.begin(); !(it == section_entries_.end()); ++it)
		{
			key = (*it).first;
			if ((key.size() > 0) && (key.find_first_of(" ", 0) > 0)) 
			{
				type_name_I = key.before(" ", 0);
				type_name_J = key.after(" ", 0);
				if ((atom_types.hasType(type_name_I)) && (atom_types.hasType(type_name_J))) 
				{
					type_I = atom_types.getType(type_name_I);
					type_J = atom_types.getType(type_name_J);
					index = (Index)(type_I * number_of_atom_types_ + type_J);
					is_defined_[index] = true;
					A_ [index] = getValue(key, "A").toFloat() * factor_A;
					B_ [index] = getValue(key, "B").toFloat() * factor_B;
					index = (Index)(type_I + number_of_atom_types_ * type_J);
					is_defined_[index] = true;
					A_ [index] = getValue(key, "A").toFloat() * factor_A;
					B_ [index] = getValue(key, "B").toFloat() * factor_B;
				}
			}
		}

		return true;
	}
Beispiel #3
0
// the meat of the process 
void NaughtyFilter::checkPICSrating(std::string label, unsigned int filtergroup)
{
	(*o.fg[filtergroup]).pics2.match(label.c_str());
	if (!(*o.fg[filtergroup]).pics2.matched()) {
		return;
	}			// exit if not found
	String lab(label.c_str());  // convert to a String for easy manip
	String r;
	String service;
	for (int i = 0; i < (*o.fg[filtergroup]).pics2.numberOfMatches(); i++) {
		r = (*o.fg[filtergroup]).pics2.result(i).c_str();  // ditto
		r = r.after("(");
		r = r.before(")");  // remove the brackets

		// Only check the substring of lab that is between
		// the start of lab (or the end of the previous match)
		// and the start of this rating.
		// It is possible to have multiple ratings in one pics-label.
		// This is done on e.g. http://www.jesusfilm.org/
		if (i == 0) {
			service = lab.subString(0, (*o.fg[filtergroup]).pics2.offset(i));
		} else {
			service = lab.subString((*o.fg[filtergroup]).pics2.offset(i - 1) + (*o.fg[filtergroup]).pics2.length(i - 1), (*o.fg[filtergroup]).pics2.offset(i));
		}

		if (service.contains("safesurf")) {
			checkPICSratingSafeSurf(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("evaluweb")) {
			checkPICSratingevaluWEB(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("microsys")) {
			checkPICSratingCyberNOT(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("icra")) {
			checkPICSratingICRA(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("rsac")) {
			checkPICSratingRSAC(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("weburbia")) {
			checkPICSratingWeburbia(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("vancouver")) {
			checkPICSratingVancouver(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("icec")) {
			checkPICSratingICEC(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		if (service.contains("safenet")) {
			checkPICSratingSafeNet(r, filtergroup);
			if (isItNaughty) {
				return;
			}
		}
		// check label for word denoting rating system then pass on to the
		// appropriate function the rating String.
	}
}
Beispiel #4
0
// check the phrase lists
void NaughtyFilter::checkphrase(char *file, off_t filelen, const String *url, const String *domain,
	unsigned int filtergroup, unsigned int phraselist, int limit, bool searchterms)
{
	int weighting = 0;
	int cat;
	std::string weightedphrase;
	
	// checkme: translate this?
	String currcat("Embedded URLs");

	// found categories list & reusable iterators
	std::map<int, listent> listcategories;

	// check for embedded references to banned sites/URLs.
	// have regexes that check for URLs in pages (look for attributes (src, href, javascript location)
	// or look for protocol strings (in which case, incl. ftp)?) and extract them.
	// then check the extracted list against the banned site/URL lists.
	// ADs category lists do not want to add to the possibility of a site being banned.
	// Exception lists are not checked.
	// Do not do full-blown category retrieval/duplicate checking; simply add the
	// "Embedded URLs" category.
	// Put a warning next to the option in the config file that this will take lots of CPU.
	// Support phrase mode 1/2 distinction (duplicate sites/URLs).
	// Have weight configurable per filter group, not globally or with a list directive - 
	//   a weight of 0 will disable the option, effectively making this functionality per-FG itself.

	// todo: if checkphrase is passed the domain & existing URL, it can create full URLs from relative ones.
	// if a src/href URL starts with a /, append it to the domain; otherwise, append it to the existing URL.
	// chop off anything after a ?, run through realPath, then put through the URL lists.

#ifdef HAVE_PCRE
	// if weighted phrases are enabled, and we have been passed a URL and domain, and embedded URL checking is enabled...
	// then check for embedded URLs!
	if (url != NULL && o.fg[filtergroup]->embedded_url_weight > 0) {
		std::map<int, listent>::iterator ourcat;
		bool catinited = false;
		std::map<String, unsigned int> found;
		std::map<String, unsigned int>::iterator founditem;

		String u;
		char* j;

		// check for absolute URLs
		if (absurl_re.match(file)) {
			// each match generates 2 results (because of the brackets in the regex), we're only interested in the first
#ifdef DGDEBUG
			std::cout << "Found " << absurl_re.numberOfMatches()/2 << " absolute URLs:" << std::endl;
#endif
			for (int i = 0; i < absurl_re.numberOfMatches(); i+=2) {
				// chop off quotes
				u = absurl_re.result(i);
				u = u.subString(1,u.length()-2);
#ifdef DGDEBUG
				std::cout << u << std::endl;
#endif
				if ((((j = o.fg[filtergroup]->inBannedSiteList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_site_list]->lastcategory.contains("ADs")))
					|| (((j = o.fg[filtergroup]->inBannedURLList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_url_list]->lastcategory.contains("ADs"))))
				{
					// duplicate checking
					// checkme: this should really be being done *before* we search the lists.
					// but because inBanned* methods do some cleaning up of their own, we don't know the form to check against.
					// we actually want these cleanups do be done before passing to inBanned*/inException* - this would
					// speed up ConnectionHandler a bit too.
					founditem = found.find(j);
					if ((o.fg[filtergroup]->weighted_phrase_mode == 2) && (founditem != found.end())) {
						founditem->second++;
					} else {
						// add the site to the found phrases list
						found[j] = 1;
						if (weightedphrase.length() == 0)
							weightedphrase = "[";
						else
							weightedphrase += " ";
						weightedphrase += j;
						if (!catinited) {
							listcategories[-1] = listent(o.fg[filtergroup]->embedded_url_weight,currcat);
							ourcat = listcategories.find(-1);
							catinited = true;
						} else
							ourcat->second.weight += o.fg[filtergroup]->embedded_url_weight;
					}
				}
			}
		}

		found.clear();

		// check for relative URLs
		if (relurl_re.match(file)) {
			// we don't want any parameters on the end of the current URL, since we append to it directly
			// when forming absolute URLs from relative ones. we do want a / on the end, too.
			String currurl(*url);
			if (currurl.contains("?"))
				currurl = currurl.before("?");
			if (currurl[currurl.length()-1] != '/')
				currurl += "/";

			// each match generates 2 results (because of the brackets in the regex), we're only interested in the first
#ifdef DGDEBUG
			std::cout << "Found " << relurl_re.numberOfMatches()/2 << " relative URLs:" << std::endl;
#endif
			for (int i = 0; i < relurl_re.numberOfMatches(); i+=2) {
				u = relurl_re.result(i);
				
				// can't find a way to negate submatches in PCRE, so it is entirely possible
				// that some absolute URLs have made their way into this list. we don't want them.
				if (u.contains("://"))
					continue;

#ifdef DGDEBUG
				std::cout << u << std::endl;
#endif
				// remove src/href & quotes
				u = u.after("=");
				u.removeWhiteSpace();
				u = u.subString(1,u.length()-2);
				
				// create absolute URL
				if (u[0] == '/')
					u = (*domain) + u;
				else
					u = currurl + u;
#ifdef DGDEBUG
				std::cout << "absolute form: " << u << std::endl;
#endif
				if ((((j = o.fg[filtergroup]->inBannedSiteList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_site_list]->lastcategory.contains("ADs")))
					|| (((j = o.fg[filtergroup]->inBannedURLList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_url_list]->lastcategory.contains("ADs"))))
				{
					// duplicate checking
					// checkme: this should really be being done *before* we search the lists.
					// but because inBanned* methods do some cleaning up of their own, we don't know the form to check against.
					// we actually want these cleanups do be done before passing to inBanned*/inException* - this would
					// speed up ConnectionHandler a bit too.
					founditem = found.find(j);
					if ((o.fg[filtergroup]->weighted_phrase_mode == 2) && (founditem != found.end())) {
						founditem->second++;
					} else {
						// add the site to the found phrases list
						found[j] = 1;
						if (weightedphrase.length() == 0)
							weightedphrase = "[";
						else
							weightedphrase += " ";
						weightedphrase += j;
						if (!catinited) {
							listcategories[-1] = listent(o.fg[filtergroup]->embedded_url_weight,currcat);
							ourcat = listcategories.find(-1);
							catinited = true;
						} else
							ourcat->second.weight += o.fg[filtergroup]->embedded_url_weight;
					}
				}
			}
		}
		if (catinited) {
			weighting = ourcat->second.weight;
			weightedphrase += "]";
#ifdef DGDEBUG
			std::cout << weightedphrase << std::endl;
			std::cout << "score from embedded URLs: " << ourcat->second.weight << std::endl;
#endif
		}
	}
#endif

	std::string bannedphrase;
	std::string exceptionphrase;
	String bannedcategory;
	int type, index, weight, time;
	bool allcmatched = true, bannedcombi = false;
	std::string s1;

	// this line here searches for phrases contained in the list - the rest of the code is all sorting
	// through it to find the categories, weightings, types etc. of what has actually been found.
	std::map<std::string, std::pair<unsigned int, int> > found;
	o.lm.l[phraselist]->graphSearch(found, file, filelen);

	// cache reusable iterators
	std::map<std::string, std::pair<unsigned int, int> >::iterator foundend = found.end();
	std::map<std::string, std::pair<unsigned int, int> >::iterator foundcurrent;

	// look for combinations first
	//if banned must wait for exception later
	std::string combifound;
	std::string combisofar;

	std::vector<int>::iterator combicurrent = o.lm.l[phraselist]->combilist.begin();
	std::map<int, listent>::iterator catcurrent;
	int lowest_occurrences = 0;

	while (combicurrent != o.lm.l[phraselist]->combilist.end()) {
		// Grab the current combination phrase part
		index = *combicurrent;
		// Do stuff if what we have is an end marker (end of one list of parts)
		if (index == -2) {
			// Were all the parts in this combination matched?
			if (allcmatched) {
				type = *(++combicurrent);
				// check this time limit against the list of time limits
				time = *(++combicurrent);
				if (not (o.lm.l[phraselist]->checkTimeAtD(time))) {
					// nope - so don't take any notice of it
#ifdef DGDEBUG
					combicurrent++;
					cat = (*++combicurrent);
					std::cout << "Ignoring combi phrase based on time limits: " << combisofar << "; "
						<< o.lm.l[phraselist]->getListCategoryAtD(cat) << std::endl;
#else
					combicurrent += 2;
#endif
					combisofar = "";
				}
				else if (type == -1) {	// combination exception
					isItNaughty = false;
					isException = true;
					// Combination exception phrase found:
					// Combination exception search term found:
					message_no = searchterms ? 456 : 605;
					whatIsNaughtyLog = o.language_list.getTranslation(message_no);
					whatIsNaughtyLog += combisofar;
					whatIsNaughty = "";
					++combicurrent;
					cat = *(++combicurrent);
					whatIsNaughtyCategories = o.lm.l[phraselist]->getListCategoryAtD(cat);
					return;
				}
				else if (type == 1) {	// combination weighting
					weight = *(++combicurrent);
					weighting += weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : lowest_occurrences);
					if (weight > 0) {
						cat = *(++combicurrent);
						//category index -1 indicates an uncategorised list
						if (cat >= 0) {
							//don't output duplicate categories
							catcurrent = listcategories.find(cat);
							if (catcurrent != listcategories.end()) {
								catcurrent->second.weight += weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : lowest_occurrences);
							} else {
								currcat = o.lm.l[phraselist]->getListCategoryAtD(cat);
								listcategories[cat] = listent(weight,currcat);
							}
						}
					} else {
						// skip past category for negatively weighted phrases
						combicurrent++;
					}
					if (weightedphrase.length() > 0) {
						weightedphrase += "+";
					}
					weightedphrase += "(";
					if (weight < 0) {
						weightedphrase += "-" + combisofar;
					} else {
						weightedphrase += combisofar;
					}
#ifdef DGDEBUG
					std::cout << "found combi weighted phrase ("<< o.fg[filtergroup]->weighted_phrase_mode << "): "
						<< combisofar << " x" << lowest_occurrences << " (per phrase: "
						<< weight << ", calculated: "
						<< (weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : lowest_occurrences)) << ")"
						<< std::endl;
#endif

					weightedphrase += ")";
					combisofar = "";
				}
				else if (type == 0) {	// combination banned
					bannedcombi = true;
					combifound += "(" + combisofar + ")";
					combisofar = "";
					combicurrent += 2;
					cat = *(combicurrent);
					bannedcategory = o.lm.l[phraselist]->getListCategoryAtD(cat);
				}
			} else {
				// We had an end marker, but not all the parts so far were matched.
				// Reset the match flag ready for the next chain, and advance to its first part.
				allcmatched = true;
				combicurrent += 4;
				lowest_occurrences = 0;
			}
		} else {
			// We didn't get an end marker - just an individual part.
			// If all parts in the current chain have been matched so far, look for this one as well.
			if (allcmatched) {
				s1 =o.lm.l[phraselist]->getItemAtInt(index);
				if ((foundcurrent = found.find(s1)) == foundend) {
					allcmatched = false;
					combisofar = "";
				} else {
					if (combisofar.length() > 0) {
						combisofar += ", ";
					}
					combisofar += s1;
					// also track lowest number of times any one part occurs in the text
					// as this will correspond to the number of times the whole chain occurs
					if ((lowest_occurrences == 0) || (lowest_occurrences > foundcurrent->second.second)) {
						lowest_occurrences = foundcurrent->second.second;
					}
				}
			}
		}
		// Advance to the next part in the current chain
		combicurrent++;
	}

	// even if we already found a combi ban, we must still wait; there may be non-combi exceptions to follow

	// now check non-combi phrases
	foundcurrent = found.begin();
	while (foundcurrent != foundend) {
		// check time for current phrase
		if (not o.lm.l[phraselist]->checkTimeAt(foundcurrent->second.first)) {
#ifdef DGDEBUG
			std::cout << "Ignoring phrase based on time limits: "
				<< foundcurrent->first << ", "
				<< o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first) << std::endl;
#endif
			foundcurrent++;
			continue;
		}
		// 0=banned, 1=weighted, -1=exception, 2=combi, 3=weightedcombi
		type = o.lm.l[phraselist]->getTypeAt(foundcurrent->second.first);
		if (type == 0) {
			// if we already found a combi ban, we don't need to know this stuff
			if (!bannedcombi) {
				isItNaughty = true;
				bannedphrase = foundcurrent->first;
				bannedcategory = o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first, &cat);
			}
		}
		else if (type == 1) {
			// found a weighted phrase - either add one lot of its score, or one lot for every occurrence, depending on phrase filtering mode
			weight = o.lm.l[phraselist]->getWeightAt(foundcurrent->second.first) * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : foundcurrent->second.second);
			weighting += weight;
			if (weight > 0) {
				currcat = o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first, &cat);
				if (cat >= 0) {
					//don't output duplicate categories
					catcurrent = listcategories.find(cat);
					if (catcurrent != listcategories.end()) {
						// add one or N times the weight to this category's score
						catcurrent->second.weight += weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : foundcurrent->second.second);
					} else {
						listcategories[cat] = listent(weight,currcat);
					}
				}
			}

			if (o.show_weighted_found) {
				if (weightedphrase.length() > 0) {
					weightedphrase += "+";
				}
				if (weight < 0) {
					weightedphrase += "-";
				}

				weightedphrase += foundcurrent->first;
			}
#ifdef DGDEBUG
			std::cout << "found weighted phrase ("<< o.fg[filtergroup]->weighted_phrase_mode << "): "
				<< foundcurrent->first << " x" << foundcurrent->second.second << " (per phrase: "
				<< o.lm.l[phraselist]->getWeightAt(foundcurrent->second.first)
				<< ", calculated: " << weight << ")" << std::endl;
#endif
		}
		else if (type == -1) {
			isException = true;
			isItNaughty = false;
			// Exception phrase found:
			// Exception search term found:
			message_no = searchterms ? 457 : 604;
			whatIsNaughtyLog = o.language_list.getTranslation(message_no);
			whatIsNaughtyLog += foundcurrent->first;
			whatIsNaughty = "";
			whatIsNaughtyCategories = o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first, NULL);
			return;  // no point in going further
		}
		foundcurrent++;
	}

#ifdef DGDEBUG
	std::cout << "WEIGHTING: " << weighting << std::endl;
#endif

	// store the lowest negative weighting or highest positive weighting out of all filtering runs, preferring to store positive weightings.
	if ((weighting < 0 && naughtiness <= 0 && weighting < naughtiness) || (naughtiness >= 0 && weighting > naughtiness) || (naughtiness < 0 && weighting > 0) ) {
		naughtiness = weighting;
	}

#ifdef DGDEBUG
	std::cout << "NAUGHTINESS: " << naughtiness << std::endl;
#endif

	// *now* we can safely get down to the whole banning business!

	if (bannedcombi) {
		isItNaughty = true;
		// Banned combination phrase found:
		// Banned combination search term found:
		message_no = searchterms ? 452: 400;
		whatIsNaughtyLog = o.language_list.getTranslation(message_no);
		whatIsNaughtyLog += combifound;
		// Banned combination phrase found.
		// Banned combination search term found.
		whatIsNaughty = o.language_list.getTranslation(searchterms ? 453 : 401);
		whatIsNaughtyCategories = bannedcategory.toCharArray();
		return;
	}

	if (isItNaughty) {
		// Banned phrase found:
		// Banned search term found:
		message_no = searchterms ? 450: 300;
		whatIsNaughtyLog = o.language_list.getTranslation(message_no);
		whatIsNaughtyLog += bannedphrase;
		// Banned phrase found.
		// Banned search term found.
		whatIsNaughty = o.language_list.getTranslation(searchterms ? 451 : 301);
		whatIsNaughtyCategories = bannedcategory.toCharArray();
		return;
	}

	if (weighting > limit) {
		isItNaughty = true;
		// Weighted phrase limit of
		// Weighted search term limit of
		message_no = searchterms ? 454: 401;
		whatIsNaughtyLog = o.language_list.getTranslation(message_no);
		whatIsNaughtyLog += String(limit).toCharArray();
		whatIsNaughtyLog += " : ";
		whatIsNaughtyLog += String(weighting).toCharArray();
		if (o.show_weighted_found) {
			whatIsNaughtyLog += " (";
			whatIsNaughtyLog += weightedphrase;
			whatIsNaughtyLog += ")";
		}
		// Weighted phrase limit exceeded.
		// Weighted search term limit exceeded.
		whatIsNaughty = o.language_list.getTranslation(searchterms ? 455 : 403);
		// Generate category list, sorted with highest scoring first.
		bool nonempty = false;
		bool belowthreshold = false;
		String categories;
		std::deque<listent> sortable_listcategories;
		catcurrent = listcategories.begin();
		while (catcurrent != listcategories.end()) {
			sortable_listcategories.push_back(catcurrent->second);
			catcurrent++;
		}
		std::sort(sortable_listcategories.begin(), sortable_listcategories.end());
		std::deque<listent>::iterator k = sortable_listcategories.begin();
		while (k != sortable_listcategories.end()) {
			// if category display threshold is in use, apply it
			if (!belowthreshold && (o.fg[filtergroup]->category_threshold > 0)
				&& (k->weight < o.fg[filtergroup]->category_threshold))
			{
				whatIsNaughtyDisplayCategories = categories.toCharArray();
				belowthreshold = true;
				usedisplaycats = true;
			}
			if (k->string.length() > 0) {
				if (nonempty) categories += ", ";
				categories += k->string;
				nonempty = true;
			}
			k++;
			// if category threshold is set to show only the top category,
			// everything after the first loop is below the threshold
			if (!belowthreshold && o.fg[filtergroup]->category_threshold < 0) {
				whatIsNaughtyDisplayCategories = categories.toCharArray();
				belowthreshold = true;
				usedisplaycats = true;
			}
		}
		whatIsNaughtyCategories = categories.toCharArray();
		return;
	}
	// whatIsNaughty is what is displayed in the browser
	// whatIsNaughtyLog is what is logged in the log file if at all
}
Beispiel #5
0
// check data received from ICAP server and interpret as virus name & return value
int icapinstance::doScan(Socket & icapsock, HTTPHeader * docheader, const char* object, unsigned int objectsize, NaughtyFilter * checkme)
{
	char *data = new char[8192];
	try {
		String line;
		int rc = icapsock.getLine(data, 8192, o.content_scanner_timeout);
		if (rc == 0)
			return ICAP_NODATA;
		line = data;
#ifdef DGDEBUG
		std::cout << "reply from icap: " << line << std::endl;
#endif
		// reply is of the format:
		// ICAP/1.0 204 No Content Necessary (etc)

		String returncode(line.after(" ").before(" "));

		if (returncode == "204") {
#ifdef DGDEBUG
			std::cerr << "ICAP says clean!" << std::endl;
#endif
			delete[]data;
			return DGCS_CLEAN;
		} else if (returncode == "100") {
#ifdef DGDEBUG
			std::cerr << "ICAP says continue!" << std::endl;
#endif
			// discard rest of headers (usually just a blank line)
			// this is so we are in the right place in the data stream to
			// call doScan() again later, because people like Symantec seem
			// to think sending code 100 then code 204 one after the other
			// is not an abuse of the ICAP specification.
			while (icapsock.getLine(data, 8192, o.content_scanner_timeout) > 0) {
				if (data[0] == 13)
					break;
			}
			delete[]data;
			return ICAP_CONTINUE;
		}
		else if (returncode == "200") {
#ifdef DGDEBUG
			std::cerr << "ICAP says maybe not clean!" << std::endl;
#endif
			while (icapsock.getLine(data, 8192, o.content_scanner_timeout) > 0) {
				if (data[0] == 13)	// end marker
					break;
				line = data;
				// Symantec's engine gives us the virus name in the ICAP headers
				if (supportsXIF && line.startsWith("X-Infection-Found")) {
#ifdef DGDEBUG
					std::cout << "ICAP says infected! (X-Infection-Found)" << std::endl;
#endif
					lastvirusname = line.after("Threat=").before(";");
					delete[]data;
					
					blockFile(NULL,NULL,checkme);
					return DGCS_INFECTED;
				}
			}
			// AVIRA's Antivir gives us 200 in all cases, so
			// - unfortunately - we must pay attention to the encapsulated
			// header/body.
			if (needsBody) {
				// grab & compare the HTTP return code from modified response
				// if it's been modified, assume there's an infection
				icapsock.getLine(data, 8192, o.content_scanner_timeout);
				line = data;
#ifdef DGDEBUG
				std::cout << "Comparing original return code to modified:" << std::endl << docheader->header.front() << std::endl << line << std::endl;
#endif
				int respmodReturnCode = line.after(" ").before(" ").toInteger();
				if (respmodReturnCode != docheader->returnCode()) {
#ifdef DGDEBUG
					std::cerr << "ICAP says infected! (returned header comparison)" << std::endl;
#endif
					delete[] data;
					lastvirusname = "Unknown";

					blockFile(NULL,NULL,checkme);
					return DGCS_INFECTED;
				}
				// ok - headers were identical, so look at encapsulated body
				// discard the rest of the encapsulated headers
				while (icapsock.getLine(data, 8192, o.content_scanner_timeout) > 0) {
					if (data[0] == 13)
						break;
				}
				// grab body chunk size
#ifdef DGDEBUG
				std::cout << "Comparing original body data to modified" << std::endl;
#endif
				icapsock.getLine(data, 8192, o.content_scanner_timeout);
				line = data;
				int bodysize = line.hexToInteger();
				// get, say, the first 100 bytes and compare them to what we
				// originally sent to see if it has been modified
				unsigned int chunksize = (bodysize < 100) ? bodysize : 100;
				if (chunksize > objectsize)
					chunksize = objectsize;
				icapsock.readFromSocket(data, chunksize, 0, o.content_scanner_timeout);
				if (memcmp(data, object, chunksize) == 0) {
#ifdef DGDEBUG
					std::cerr << "ICAP says clean!" << std::endl;
#endif
					delete[]data;
					return DGCS_CLEAN;
				} else {
#ifdef DGDEBUG
					std::cerr << "ICAP says infected! (body byte comparison)" << std::endl;
#endif
					delete[] data;
					lastvirusname = "Unknown";

					blockFile(NULL,NULL,checkme);
					return DGCS_INFECTED;
				}
			}
			// even if we don't find an X-Infection-Found header,
			// the file is still infected!
#ifdef DGDEBUG
			std::cerr << "ICAP says infected! (no further tests)" << std::endl;
#endif
			delete[] data;
			lastvirusname = "Unknown";

			blockFile(NULL,NULL,checkme);
			return DGCS_INFECTED;
		}
		else if (returncode == "404") {
#ifdef DGDEBUG
			std::cerr << "ICAP says no such service!" << std::endl;
#endif
			lastmessage = "ICAP reports no such service";
			syslog(LOG_ERR, "ICAP reports no such service; check your server URL");
			delete[]data;
			return DGCS_SCANERROR;
		} else {
#ifdef DGDEBUG
			std::cerr << "ICAP returned unrecognised response code: " << returncode << std::endl;
#endif
			lastmessage = "ICAP returned unrecognised response code.";
			syslog(LOG_ERR, "ICAP returned unrecognised response code: %s", returncode.toCharArray());
			delete[]data;
			return DGCS_SCANERROR;
		}
		delete[]data;
	}
	catch(std::exception & e) {
#ifdef DGDEBUG
		std::cerr << "Exception getting reply from ICAP: " << e.what() << std::endl;
#endif
		lastmessage = "Exception getting reply from ICAP.";
		syslog(LOG_ERR, "Exception getting reply from ICAP: %s", e.what());
		delete[]data;
		return DGCS_SCANERROR;
	}
	// it is generally NOT a good idea, when using virus scanning,
	// to continue as if nothing went wrong by default!
	return DGCS_SCANERROR;
}
Beispiel #6
0
// initialise the plugin - determine icap ip, port & url
int icapinstance::init(void* args)
{
	// always include these lists
	if (!readStandardLists()) {
		return DGCS_ERROR;
	}

	icapurl = cv["icapurl"];  // format: icap://icapserver:1344/avscan
	if (icapurl.length() < 3) {
		if (!is_daemonised)
			std::cerr << "Error reading icapurl option." << std::endl;
		syslog(LOG_ERR, "Error reading icapurl option.");
		return DGCS_ERROR;
		// it would be far better to do a test connection
	}
	icaphost = icapurl.after("//");
	icapport = icaphost.after(":").before("/").toInteger();
	if (icapport == 0) {
		icapport = 1344;
	}
	icaphost = icaphost.before("/");
	if (icaphost.contains(":")) {
		icaphost = icaphost.before(":");
	}
	struct hostent *host;
	if ((host = gethostbyname(icaphost.toCharArray())) == 0) {
		if (!is_daemonised)
			std::cerr << "Error resolving icap host address." << std::endl;
		syslog(LOG_ERR, "Error resolving icap host address.");
		return DGCS_ERROR;
	}
	icapip = inet_ntoa(*(struct in_addr *) host->h_addr_list[0]);

#ifdef DGDEBUG
	std::cerr << "ICAP server address:" << icapip << std::endl;
#endif

	// try to connect to the ICAP server and perform an OPTIONS request
	Socket icapsock;
	try {
		if (icapsock.connect(icapip.toCharArray(), icapport) < 0) {
			throw std::runtime_error("Could not connect to server");
		}
		String line("OPTIONS " + icapurl + " ICAP/1.0\r\nHost: " + icaphost + "\r\n\r\n");
		icapsock.writeString(line.toCharArray());
		// parse the response
		char buff[8192];
		// first line - look for 200 OK
		icapsock.getLine(buff, 8192, o.content_scanner_timeout);
		line = buff;
#ifdef DGDEBUG
		std::cout << "ICAP/1.0 OPTIONS response:" << std::endl << line << std::endl;
#endif
		if (line.after(" ").before(" ") != "200") {
			if (!is_daemonised)
				std::cerr << "ICAP response not 200 OK" << std::endl;
			syslog(LOG_ERR, "ICAP response not 200 OK");
			return DGCS_WARNING;
			//throw std::runtime_error("Response not 200 OK");
		}
		while (icapsock.getLine(buff, 8192, o.content_scanner_timeout) > 0) {
			line = buff;
#ifdef DGDEBUG
			std::cout << line << std::endl;
#endif
			if (line.startsWith("\r")) {
				break;
			}
			else if (line.startsWith("Preview:")) {
				usepreviews = true;
				previewsize = line.after(": ").toInteger();
			}
			else if (line.startsWith("Server:")) {
				if (line.contains("AntiVir-WebGate")) {
					needsBody = true;
				}
			}
			else if (line.startsWith("X-Allow-Out:")) {
				if (line.contains("X-Infection-Found")) {
					supportsXIF = true;
				}
			}
		}
		icapsock.close();
	} catch(std::exception& e) {
		if (!is_daemonised)
			std::cerr << "ICAP server did not respond to OPTIONS request: " << e.what() << std::endl;
		syslog(LOG_ERR, "ICAP server did not respond to OPTIONS request: %s", e.what());
		return DGCS_ERROR;
	}
#ifdef DGDEBUG
	if (usepreviews)
		std::cout << "Message previews enabled; size: " << previewsize << std::endl;
	else
		std::cout << "Message previews disabled" << std::endl;
#endif
	return DGCS_OK;
}
// default method for deciding whether we will handle a request
bool DMPlugin::willHandle(HTTPHeader *requestheader, HTTPHeader *docheader)
{
	// match user agent first (quick)
	if (!(alwaysmatchua || ua_match.match(requestheader->userAgent().toCharArray())))
		return false;
	
	// then check standard lists (mimetypes & extensions)

	// mimetypes
	String mimetype("");
	bool matchedmime = false;
	if (mimelistenabled) {
		mimetype = docheader->getContentType();
#ifdef DGDEBUG
		std::cout<<"mimetype: "<<mimetype<<std::endl;
#endif
		if (mimetypelist.findInList(mimetype.toCharArray()) == NULL) {
			if (!extensionlistenabled)
				return false;
		} else
			matchedmime = true;
	}
	
	if (extensionlistenabled && !matchedmime) {
		// determine the extension
		String path(requestheader->decode(requestheader->url()));
		path.removeWhiteSpace();
		path.toLower();
		path.removePTP();
		path = path.after("/");
		path.hexDecode();
		path.realPath();
		String disposition(docheader->disposition());
		String extension;
		if (disposition.length() > 2) {
			extension = disposition;
			while (extension.contains(".")) {
				extension = extension.after(".");
			}
			extension = "." + extension;
		} else {
			if (!path.contains("?")) {
				extension = path;
			}
			else {
				if (mimetype.length() == 0)
					mimetype = docheader->getContentType();
				if (mimetype.contains("application/")) {
					extension = path;
					if (extension.contains("?")) {
						extension = extension.before("?");
					}
				}
			}
		}
	#ifdef DGDEBUG
		std::cout<<"extension: "<<extension<<std::endl;
	#endif
		// check the extension list
		if (!extension.contains(".") || (extensionlist.findEndsWith(extension.toCharArray()) == NULL))
				return matchedmime;
	}

	return true;
}
Beispiel #8
0
	void DockingAlgorithm::readOptionFile(String filename, Options& output_options, list<Constraint*>& output_constraints, const AtomContainer* ref_ligand)
	{
		INIFile ini(filename);
		ini.read();
		Size num_sections = ini.getNumberOfSections();
		for(Size i = 0; i < num_sections; i++)
		{
			String name = ini.getSection(i)->getName();
			if (name.hasPrefix("ReferenceArea") || name.hasPrefix("PharmacophoreConstraint"))
			{
				continue;
			}

			Options* options_category = &output_options;
			if (name != "Docking-Settings")
			{
				options_category = output_options.createSubcategory(name);
			}

			Log.level(10)<<endl<<"--- Reading parameter-section '" << name << "' from file "<<"'"<<filename<<"' :  -----"<<endl;
			INIFile::LineIterator it = ini.getSectionFirstLine(name);
			INIFile::LineIterator it_end = ini.getSectionLastLine(name).getSectionNextLine();
			it.getSectionNextLine();
			for (; it != it_end; it.getSectionNextLine())
			{
				String line = *it;
				if (line == "") continue;
				String key = line.before("="); key.trim();
				String value = line.after("="); value.trim();
				if (key == "" || value == "") continue;
				options_category->set(key, value);

				if (name == ScoringFunction::SUBCATEGORY_NAME || name == "IMGDock")
				{
					Log.level(10)<<key<<" : "<<value<<endl;
				}
			}
		}

		for (Size i = 0; i < 100; i++)
		{
			string sec_name = "ReferenceArea"+String(i);

			if (!ini.hasSection(sec_name)) break;

			Log.level(10)<<endl<<"--- Reading "<<sec_name<<" from file "<<"'"<<filename<<"' :  -----"<<endl;

			String name = ini.getValue(sec_name, "name");
			bool is_fraction = ini.getValue(sec_name, "is_fraction").toBool();
			double penalty = ini.getValue(sec_name, "penalty").toDouble();
			double atoms = ini.getValue(sec_name, "atoms").toDouble();
			vector<Vector3> v(4);

			Log.level(10)<<"name = "<<name<<endl;
			Log.level(10)<<"is_fraction = "<<is_fraction<<endl;
			Log.level(10)<<"atoms = "<<atoms<<endl;
			Log.level(10)<<"penalty = "<<penalty<<endl;
			ReferenceArea* rf;

			String use_ref = ini.getValue(sec_name, "use_ref_ligand");
			if (use_ref != INIFile::UNDEFINED && use_ref.toBool())
			{
				Log.level(10)<<"use_ref_ligand = true"<<endl;
				if (!ref_ligand)
				{
					throw BALL::Exception::GeneralException(__FILE__, __LINE__, "DockingAlgorithm::readOptionFile()", "Reference-ligand required but not specified!");
				}
				rf = new ReferenceArea(ref_ligand, is_fraction, atoms, penalty);
				v = rf->input_points_;
			}
			else
			{
				for (Size i = 0; i <= 3; i++)
				{
					String pn = "p"+String(i);
					String s = ini.getValue(sec_name, pn);
					if (s == INIFile::UNDEFINED)
					{
						Log.error()<<"[Error:] 4 points must be defined for each ReferenceArea!"<<endl;
						return;
					}
					s.trim();
					double d0 = s.getField(0, ", ").toDouble();
					double d1 = s.getField(1, ", ").toDouble();
					double d2 = s.getField(2, ", ").toDouble();
					v[i] = Vector3(d0, d1, d2);
				}
				rf = new ReferenceArea(v[0], v[1], v[2], v[3], is_fraction, atoms, penalty);
			}

			// Increase size of box (e.g. bounding box around ref-ligand), if desired by the user.
			String inc = ini.getValue(sec_name, "box_size_increase");
			if (inc != INIFile::UNDEFINED)
			{
				rf->enlarge(inc.toDouble());
			}

			Log.level(10)<<"p0 = "<<v[0]<<endl;
			Log.level(10)<<"p1 = "<<v[1]<<endl;
			Log.level(10)<<"p2 = "<<v[2]<<endl;
			Log.level(10)<<"p3 = "<<v[3]<<endl;

			if (name != INIFile::UNDEFINED) rf->setName(name);
			output_constraints.push_back(rf);
		}

		for (Size i = 0; i < 100; i++)
		{
			string sec_name = "PharmacophoreConstraint"+String(i);

			if (!ini.hasSection(sec_name)) break;

			Log.level(10)<<endl<<"--- Reading "<<sec_name<<" from file "<<"'"<<filename<<"' :  -----"<<endl;

			String name = ini.getValue(sec_name, "name");
			double penalty = ini.getValue(sec_name, "penalty").toDouble();
			double desired_energy = ini.getValue(sec_name, "desired interaction energy").toDouble();
			String residues = ini.getValue(sec_name, "residue-IDs");
			vector<String> residue_vector;
			residues.split(residue_vector, ", ");
			String types = ini.getValue(sec_name, "interaction types");
			vector<String> types_vector;
			types.split(types_vector, ", ");
			list<String> types_list;
			for (Size i = 0; i < types_vector.size(); i++)
			{
				types_list.push_back(types_vector[i]);
			}

			Log.level(10)<<"name = "<<name<<endl;
			Log.level(10)<<"residue-IDs = "<<residues<<endl;
			Log.level(10)<<"interaction types = "<<types<<endl;
			Log.level(10)<<"desired interaction energy = "<<desired_energy<<endl;
			Log.level(10)<<"penalty = "<<penalty<<endl;

			PharmacophoreConstraint* phc = new PharmacophoreConstraint(residue_vector, types_list, desired_energy, penalty);
			if (name != INIFile::UNDEFINED) phc->setName(name);
			output_constraints.push_back(phc);
		}
		Log.level(10)<<endl<<"--- finished reading config-file."<<endl<<endl<<endl;
	}
Beispiel #9
0
// read in a list linking IPs, subnets & IP ranges to filter groups
bool IPList::readIPMelangeList(const char *filename)
{
	// load in the list file
	std::ifstream input ( filename );
	if (!input) {
		if (!is_daemonised) {
			std::cerr << "Error reading file (does it exist?): " << filename << std::endl;
		}
		syslog(LOG_ERR, "%s%s","Error reading file (does it exist?): ",filename);
		return false;
	}

	// compile regexps for determining whether a list entry is an IP, a subnet (IP + mask), or a range
	RegExp matchIP, matchSubnet, matchRange;
#ifdef HAVE_PCRE
	matchIP.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$");
	matchSubnet.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$");
	matchRange.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}-\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$");
#else
	matchIP.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$");
	matchSubnet.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}/[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$");
	matchRange.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}-[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$");
#endif

	// read in the file
	String line;
	char buffer[ 2048 ];
	while (input) {
		if (!input.getline(buffer, sizeof( buffer ))) {
			break;
		}
		// ignore comments
		if (buffer[0] == '#')
			continue;
		// ignore blank lines
		if (strlen(buffer) < 7)
			continue;
		line = buffer;
#ifdef DGDEBUG
		std::cout << "line: " << line << std::endl;
#endif
		// store the IP address (numerically, not as a string) and filter group in either the IP list, subnet list or range list
		if (matchIP.match(line.toCharArray())) {
			struct in_addr address;
			if (inet_aton(line.toCharArray(), &address)) {
				uint32_t addr = ntohl(address.s_addr);
				iplist.push_back(addr);
			}
		}
		else if (matchSubnet.match(line.toCharArray())) {
			struct in_addr address;
			struct in_addr addressmask;
			String subnet(line.before("/"));
			String mask(line.after("/"));
			if (inet_aton(subnet.toCharArray(), &address) && inet_aton(mask.toCharArray(), &addressmask)) {
				ipl_subnetstruct s;
				uint32_t addr = ntohl(address.s_addr);
				s.mask = ntohl(addressmask.s_addr);
				// pre-mask the address for quick comparison
				s.maskedaddr = addr & s.mask;
				ipsubnetlist.push_back(s);
			}
		}
		else if (matchRange.match(line.toCharArray())) {
			struct in_addr addressstart;
			struct in_addr addressend;
			String start(line.before("-"));
			String end(line.after("-"));
			if (inet_aton(start.toCharArray(), &addressstart) && inet_aton(end.toCharArray(), &addressend)) {
				ipl_rangestruct r;
				r.startaddr = ntohl(addressstart.s_addr);
				r.endaddr = ntohl(addressend.s_addr);
				iprangelist.push_back(r);
			}
		}
		// hmmm. the line didn't match any of our regular expressions.
		// assume it's a hostname.
		else {
			line.toLower();
			hostlist.push_back(line);
		}
	}
	input.close();
#ifdef DGDEBUG
	std::cout << "starting sort" << std::endl;
#endif
	std::sort(iplist.begin(), iplist.end());
	std::sort(hostlist.begin(), hostlist.end());
#ifdef DGDEBUG
	std::cout << "sort complete" << std::endl;
	std::cout << "ip list dump:" << std::endl;
	std::vector<uint32_t>::iterator i = iplist.begin();
	while (i != iplist.end()) {
		std::cout << "IP: " << *i << std::endl;
		++i;
	}
	std::cout << "subnet list dump:" << std::endl;
	std::list<ipl_subnetstruct>::iterator j = ipsubnetlist.begin();
	while (j != ipsubnetlist.end()) {
		std::cout << "Masked IP: " << j->maskedaddr << " Mask: " << j->mask << std::endl;
		++j;
	}
	std::cout << "range list dump:" << std::endl;
	std::list<ipl_rangestruct>::iterator k = iprangelist.begin();
	while (k != iprangelist.end()) {
		std::cout << "Start IP: " << k->startaddr << " End IP: " << k->endaddr << std::endl;
		++k;
	}
	std::cout << "host list dump:" << std::endl;
	std::vector<String>::iterator l = hostlist.begin();
	while (l != hostlist.end()) {
		std::cout << "Hostname: " << *l << std::endl;
		++l;
	}
#endif
	return true;
}
Beispiel #10
0
// Test whether or not a particular request's incoming/outgoing data should be scanned.
// This is a later-stage test; info is known about the actual data itself when this is called.
int CSPlugin::willScanData(const String &url, const char *user, int filtergroup, const char *ip, bool post,
    bool reconstituted, bool exception, bool bypass, const String &disposition, const String &mimetype,
    off_t size)
{
    //exceptionvirusmimetypelist
    if (mimetype.length() > 2) {
        if (exceptionvirusmimetypelist.findInList(mimetype.toCharArray()) != NULL) {
#ifdef DGDEBUG
            std::cout << "willScanData: ignoring exception MIME type (" << mimetype.c_str() << ")" << std::endl;
#endif
            return DGCS_NOSCAN; // match
        }
    }

    //exceptionvirusextensionlist
    String extension;
    if (disposition.length() > 2) {
// If we have a content-disposition, determine file extension from that
#ifdef DGDEBUG
        std::cout << "disposition: " << disposition << std::endl;
#endif
        std::string::size_type start = disposition.find("filename=");
        if (start != std::string::npos) {
            start += 9;
            char endchar = ';';
            if (disposition[start] == '"') {
                endchar = '"';
                ++start;
            }
            std::string::size_type end = disposition.find(endchar, start);
            if (end != std::string::npos)
                extension = disposition.substr(start, end - start);
            else
                extension = disposition.substr(start);
        }
        while (extension.contains(".")) {
            extension = extension.after(".");
        }
        extension = "." + extension;
#ifdef DGDEBUG
        std::cout << "extension from disposition: " << extension << std::endl;
#endif
    } else {
        // Otherwise, determine it from the URL
        String urld(HTTPHeader::decode(url)), path;
        urld.removeWhiteSpace();
        urld.toLower();
        urld.removePTP();

        if (urld.contains("/")) {
            path = urld.after("/");
            path.hexDecode();
            path.realPath();
        }

        if (!path.contains("?")) {
            extension = path;
        } else if (mimetype.contains("application/")) {
            extension = path;
            if (extension.contains("?")) {
                extension = extension.before("?");
            }
        }
#ifdef DGDEBUG
        std::cout << "extension from URL: " << extension << std::endl;
#endif
    }
    if (extension.contains(".")) {
        if (exceptionvirusextensionlist.findEndsWith(extension.toCharArray()) != NULL) {
#ifdef DGDEBUG
            std::cout << "willScanData: ignoring exception file extension (" << extension.c_str() << ")" << std::endl;
#endif
            return DGCS_NOSCAN; // match
        }
    }

#ifdef DGDEBUG
    std::cout << "willScanData: I'm interested" << std::endl;
#endif
    return DGCS_NEEDSCAN;
}
Beispiel #11
0
// ntlm auth header username extraction - also lets connection persist long enough to complete NTLM negotiation
int ntlminstance::identify(Socket& peercon, Socket& proxycon, HTTPHeader &h, std::string &string)
{
	FDTunnel fdt;
	Socket* upstreamcon;
	Socket ntlmcon;
	String url;
	if (transparent) {
		// we are actually sending to a second Squid, which just does NTLM
		ntlmcon.connect(transparent_ip, transparent_port);
		upstreamcon = &ntlmcon;
		url = h.getUrl();
		h.makeTransparent(false);
	} else {
		upstreamcon = &proxycon;
	}
	String at(h.getAuthType());
	if (transparent && (at != "NTLM")) {
		// obey forwarded-for options in what we send out
		std::string clientip;
		if (o.forwarded_for == 1) {
			if (o.use_xforwardedfor == 1) {
				// grab the X-Forwarded-For IP if available
				clientip = h.getXForwardedForIP();
				// otherwise, grab the IP directly from the client connection
				if (clientip.length() == 0)
					clientip = peercon.getPeerIP();
			} else {
				clientip = peercon.getPeerIP();
			}
			h.addXForwardedFor(clientip);  // add squid-like entry
		}
		
		// in transparent mode, we need to make the initial auth required response
		// appear to come from the smoothie itself as an origin server, not as a proxy.
		//
		// accomplish this by redirecting to a URL that results in accessing DG as if it was
		// a webserver, fudging origin-server-style NTLM auth to the client whilst actually
		// performing proper proxy-style auth to the parent proxy, then redirecting the client
		// back to the actual URL.

		if (!url.contains("sgtransntlmdest=")) {
			// user has not yet been redirected
			// get the browser to make a request to the proxy port on the relevant interface,
			// embedding the original URL they were trying to access.
			// unless they're accessing a domain for which authentication is not required,
			// in which case return a no match response straight away.
			if (no_auth_list >= 0)
			{
#ifdef DGDEBUG
				std::cout << "NTLM: Checking noauthdomains list" << std::endl;
#endif
				std::string::size_type start = url.find("://");
				if (start != std::string::npos)
				{
					start += 3;
					std::string domain;
					domain = url.getHostname();
#ifdef DGDEBUG
					std::cout << "NTLM: URL " << url << ", domain " << domain << std::endl;
#endif
					char *i;
					while ((start = domain.find('.')) != std::string::npos)
					{
						i = o.lm.l[no_auth_list]->findInList(domain.c_str());
						if (i != NULL)
						{
#ifdef DGDEBUG
							std::cout << "NTLM: Found domain in noauthdomains list" << std::endl;
#endif
							return DGAUTH_NOMATCH;
						}
						domain.assign(domain.substr(start + 1));
					}
					if (!domain.empty())
					{
						domain = "." + domain;
						i = o.lm.l[no_auth_list]->findInList(domain.c_str());
						if (i != NULL)
						{
#ifdef DGDEBUG
							std::cout << "NTLM: Found domain in noauthdomains list" << std::endl;
#endif
							return DGAUTH_NOMATCH;
						}
					}
				}
			}
			string = "http://";
			string += hostname;
			string += ":";
			string += String(peercon.getPort()).toCharArray();
			string += "/?sgtransntlmdest=";
			string += url.toCharArray();
#ifdef DGDEBUG
			std::cout << "NTLM - redirecting client to " << string << std::endl;
#endif
			return DGAUTH_REDIRECT;
		}

#ifdef DGDEBUG
		std::cout << "NTLM - forging initial auth required from origin server" << std::endl;
#endif
		// obey forwarded-for options in what we send out
		if (o.forwarded_for == 1) {
			std::string clientip;
			if (o.use_xforwardedfor == 1) {
				// grab the X-Forwarded-For IP if available
				clientip = h.getXForwardedForIP();
				// otherwise, grab the IP directly from the client connection
				if (clientip.length() == 0)
					clientip = peercon.getPeerIP();
			} else {
				clientip = peercon.getPeerIP();
			}
			h.addXForwardedFor(clientip);  // add squid-like entry
		}
		// send a variant on the original request (has to be something Squid will route to the outside
		// world, and that it will require NTLM authentication for)
		String domain(url.after("?sgtransntlmdest=").after("://"));
		if (domain.contains("/")) domain = domain.before("/");
		domain = "http://" + domain + "/";
		h.setURL(domain);
		h.makePersistent();
		h.out(&peercon, upstreamcon, __DGHEADER_SENDALL);
		// grab the auth required response and make it look like it's from the origin server
		h.in(upstreamcon, true);
		h.makeTransparent(true);
		h.makePersistent();
		// send it to the client
		h.out(NULL, &peercon, __DGHEADER_SENDALL);
		if (h.contentLength() != -1)
			fdt.tunnel(*upstreamcon, peercon, false, h.contentLength(), true);
		if (h.isPersistent()) {
			// now grab the client's response to the auth request, and carry on as usual.
			h.in(&peercon, true);
			h.makeTransparent(false);
			at = h.getAuthType();
		} else
			return DGAUTH_NOMATCH;
	} else if (transparent && url.contains("?sgtransntlmdest=")) {
		// send a variant on the original request (has to be something Squid will route to the outside
		// world, and that it will require NTLM authentication for)
		String domain(url.after("?sgtransntlmdest=").after("://"));
		if (domain.contains("/")) domain = domain.before("/");
		domain = "http://" + domain + "/";
		h.setURL(domain);
	}

	if (at != "NTLM") {
		// if no auth currently underway, then...
		if (at.length() == 0) {
			// allow the initial request through so the client will get the proxy's initial auth required response.
			// advertise persistent connections so that parent proxy will agree to advertise NTLM support.
#ifdef DGDEBUG
			std::cout << "No auth negotiation currently in progress - making initial request persistent so that proxy will advertise NTLM" << std::endl;
#endif
			h.makePersistent();
		}
		return DGAUTH_NOMATCH;
	}

#ifdef DGDEBUG
	std::cout << "NTLM - sending step 1" << std::endl;
#endif
	if (o.forwarded_for) {
		std::string clientip;
		if (o.use_xforwardedfor) {
			// grab the X-Forwarded-For IP if available
			clientip = h.getXForwardedForIP();
			// otherwise, grab the IP directly from the client connection
			if (clientip.length() == 0)
				clientip = peercon.getPeerIP();
		} else {
			clientip = peercon.getPeerIP();
		}
		h.addXForwardedFor(clientip);  // add squid-like entry
	}
	h.makePersistent();
	h.out(&peercon, upstreamcon, __DGHEADER_SENDALL);
#ifdef DGDEBUG
	std::cout << "NTLM - receiving step 2" << std::endl;
#endif
	h.in(upstreamcon, true);

	if (h.authRequired()) {
#ifdef DGDEBUG
		std::cout << "NTLM - sending step 2" << std::endl;
#endif
		if (transparent)
			h.makeTransparent(true);
		h.out(NULL, &peercon, __DGHEADER_SENDALL);
		if (h.contentLength() != -1)
			fdt.tunnel(*upstreamcon, peercon, false, h.contentLength(), true);
#ifdef DGDEBUG
		std::cout << "NTLM - receiving step 3" << std::endl;
#endif
		h.in(&peercon, true);
		if (transparent) {
			h.makeTransparent(false);
			String domain(url.after("?sgtransntlmdest=").after("://"));
			if (domain.contains("/")) domain = domain.before("/");
			domain = "http://" + domain + "/";
			h.setURL(domain);
		}

#ifdef DGDEBUG
		std::cout << "NTLM - decoding type 3 message" << std::endl;
#endif

		std::string message(h.getAuthData());

		ntlm_authenticate auth;
		ntlm_auth *a = &(auth.a);
		static char username[256]; // fixed size
		static char username2[256];
		char* inptr = username;
		char* outptr = username2;
		size_t l,o;

		// copy the NTLM message into the union's buffer, simultaneously filling in the struct
		if ((message.length() > sizeof(ntlm_auth)) || (message.length() < offsetof(ntlm_auth, payload))) {
			syslog(LOG_ERR, "NTLM - Invalid message of length %zd, message was: %s", message.length(), message.c_str());
#ifdef DGDEBUG
			std::cerr << "NTLM - Invalid message of length " << message.length() << ", message was: " << message << std::endl;
#endif
			return -3;
		}
		memcpy((void *)auth.buf, (const void *)message.c_str(), message.length());

		// verify that the message is indeed a type 3
		if (strcmp("NTLMSSP",a->h.signature) == 0 && WSWAP(a->h.type) == 3) {
			// grab the length & offset of the username within the message
			// cope with the possibility we are a different byte order to Windows
			l = SSWAP(a->user.len);
			o = WSWAP(a->user.offset);

			if ((l > 0) && (o >= 0) && (o + l) <= sizeof(a->payload) && (l <= 254)) {
				// everything is in range
				// note offsets are from start of packet - not the start of the payload area
				memcpy((void *)username, (const void *)&(auth.buf[o]),l);
				username[l] = '\0';
				// check flags - we may need to convert from UTF-16 to something more sensible
				int f = WSWAP(a->flags);
				if (f & WSWAP(0x0001)) {
					iconv_t ic = iconv_open("UTF-8", "UTF-16LE");
					if (ic == (iconv_t)-1) {
						syslog(LOG_ERR, "NTLM - Cannot initialise conversion from UTF-16LE to UTF-8: %s", strerror(errno));
#ifdef DGDEBUG
						std::cerr << "NTLM - Cannot initialise conversion from UTF-16LE to UTF-8: " << strerror(errno) << std::endl;
#endif
						iconv_close(ic);
						return -2;
					}
					size_t l2 = 256;
					local_iconv_adaptor(iconv, ic, &inptr, &l, &outptr, &l2);
					iconv_close(ic);
					username2[256 - l2] = '\0';
#ifdef DGDEBUG
					std::cout << "NTLM - got username (converted from UTF-16LE) " << username2 << std::endl;
#endif
					string = username2;
				} else {
#ifdef DGDEBUG
					std::cout << "NTLM - got username " << username << std::endl;
#endif
					string = username;
				}
				if (!transparent)
					return DGAUTH_OK;
				// if in transparent mode, send a redirect to the client's original requested URL,
				// having sent the final headers to the NTLM-only Squid to do with what it will
				std::string tmp = peercon.getPeerIP();
				h.addXForwardedFor(tmp);
				h.out(&peercon, upstreamcon, __DGHEADER_SENDALL);
				// also, the return code matters in ways it hasn't mattered before:
				// mustn't send a redirect if it is still 407, or we get a redirection loop
				h.in(upstreamcon, true);
				if (h.returnCode() == 407)
				{
					h.makeTransparent(false);
					h.out(NULL, &peercon, __DGHEADER_SENDALL);
					return -10;
				}
				url = url.after("=");
				string = url.toCharArray();
				return DGAUTH_REDIRECT;
			}
		}
		return DGAUTH_NOMATCH;
	} else {
#ifdef DGDEBUG
		std::cout << "NTLM - step 2 was not part of an auth handshake!" << std::endl;
		for (unsigned int i = 0; i < h.header.size(); i++)
			std::cout << h.header[i] << std::endl;
#endif
		syslog(LOG_ERR, "NTLM - step 2 was not part of an auth handshake! (%s)", h.header[0].toCharArray());
		return -1;
	}
}
Beispiel #12
0
// read in a list linking IPs, subnets & IP ranges to filter groups
// return 0 for success, -1 for failure, 1 for warning
int ipinstance::readIPMelangeList(const char *filename) {
	// load in the list file
	std::ifstream input ( filename );
	if (!input) {
		if (!is_daemonised) {
			std::cerr << "Error reading file (does it exist?): " << filename << std::endl;
		}
		syslog(LOG_ERR, "%s%s","Error reading file (does it exist?): ",filename);
		return -1;
	}

	// compile regexps for determining whether a list entry is an IP, a subnet (IP + mask), or a range
	RegExp matchIP, matchSubnet, matchRange;
#ifdef HAVE_PCRE
	matchIP.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$");
	matchSubnet.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$");
	matchRange.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}-\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$");
#else
	matchIP.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$");
	matchSubnet.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}/[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$");
	matchRange.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}-[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$");
#endif

	// read in the file
	String line;
	String key, value;
	char buffer[ 2048 ];
	bool warn = false;
	while (input) {
		if (!input.getline(buffer, sizeof( buffer ))) {
			break;
		}
		// ignore comments
		if (buffer[0] == '#')
			continue;
		// ignore blank lines
		if (strlen(buffer) < 10)
			continue;
		line = buffer;
		// split into key & value
		if (line.contains("=")) {
			key = line.before("=");
			key.removeWhiteSpace();
			value = line.after("filter");
		}
		else {
			if (!is_daemonised)
				std::cerr << "No filter group given; entry " << line << " in " << filename << std::endl;
			syslog(LOG_ERR, "No filter group given; entry %s in %s", line.toCharArray(), filename);
			warn = true;
			continue;
		}
#ifdef DGDEBUG
		std::cout << "key: " << key << std::endl;
		std::cout << "value: " << value.toInteger() << std::endl;
#endif
		if ((value.toInteger() < 1) || (value.toInteger() > o.filter_groups)) {
			if (!is_daemonised)
				std::cerr << "Filter group out of range; entry " << line << " in " << filename << std::endl;
			syslog(LOG_ERR, "Filter group out of range; entry %s in %s", line.toCharArray(), filename);
			warn = true;
			continue;
		}
		// store the IP address (numerically, not as a string) and filter group in either the IP list, subnet list or range list
		if (matchIP.match(key.toCharArray())) {
			struct in_addr address;
			if (inet_aton(key.toCharArray(), &address)) {
				iplist.push_back(ip(ntohl(address.s_addr),value.toInteger()-1));
			}
		}
		else if (matchSubnet.match(key.toCharArray())) {
			struct in_addr address;
			struct in_addr addressmask;
			String subnet(key.before("/"));
			String mask(key.after("/"));
			if (inet_aton(subnet.toCharArray(), &address) && inet_aton(mask.toCharArray(), &addressmask)) {
				subnetstruct s;
				int addr = ntohl(address.s_addr);
				s.mask = ntohl(addressmask.s_addr);
				// pre-mask the address for quick comparison
				s.maskedaddr = addr & s.mask;
				s.group = value.toInteger()-1;
				ipsubnetlist.push_back(s);
			}
		}
		else if (matchRange.match(key.toCharArray())) {
			struct in_addr addressstart;
			struct in_addr addressend;
			String start(key.before("-"));
			String end(key.after("-"));
			if (inet_aton(start.toCharArray(), &addressstart) && inet_aton(end.toCharArray(), &addressend)) {
				rangestruct r;
				r.startaddr = ntohl(addressstart.s_addr);
				r.endaddr = ntohl(addressend.s_addr);
				r.group = value.toInteger()-1;
				iprangelist.push_back(r);
			}
		}
		// hmmm. the key didn't match any of our regular expressions. output message & return a warning value.
		else {
			if (!is_daemonised)
				std::cerr << "Entry " << line << " in " << filename << " was not recognised as an IP address, subnet or range" << std::endl;
			syslog(LOG_ERR, "Entry %s in %s was not recognised as an IP address, subnet or range", line.toCharArray(), filename);
			warn = true;
		}
	}
	input.close();
#ifdef DGDEBUG
	std::cout << "starting sort" << std::endl;
#endif
	std::sort(iplist.begin(), iplist.end());
#ifdef DGDEBUG
	std::cout << "sort complete" << std::endl;
	std::cout << "ip list dump:" << std::endl;
	std::vector<ip>::const_iterator i = iplist.begin();
	while (i != iplist.end()) {
		std::cout << "IP: " << i->addr << " Group: " << i->group << std::endl;
		++i;
	}
	std::cout << "subnet list dump:" << std::endl;
	std::list<subnetstruct>::const_iterator j = ipsubnetlist.begin();
	while (j != ipsubnetlist.end()) {
		std::cout << "Masked IP: " << j->maskedaddr << " Mask: " << j->mask << " Group: " << j->group << std::endl;
		++j;
	}
	std::cout << "range list dump:" << std::endl;
	std::list<rangestruct>::const_iterator k = iprangelist.begin();
	while (k != iprangelist.end()) {
		std::cout << "Start IP: " << k->startaddr << " End IP: " << k->endaddr << " Group: " << k->group << std::endl;
		++k;
	}
#endif
	// return either warning or success
	return warn ? 1 : 0;
}
String HTTPHeader::url(bool withport)
{
	// Version of URL *with* port is not cached,
	// as vast majority of our code doesn't like
	// port numbers in URLs.
	if (cachedurl.length() > 0 && !withport)
		return cachedurl;
	port = 80;
	bool https = false;
	String hostname;
	String answer(header.front().after(" "));
	answer.removeMultiChar(' ');
	if (answer.after(" ").startsWith("HTTP/")) {
		answer = answer.before(" HTTP/");
	} else {
		answer = answer.before(" http/");  // just in case!
	}
	if (requestType() == "CONNECT") {
		https = true;
		port = 443;
		if (!answer.startsWith("https://")) {
			answer = "https://" + answer;
		}
	}
	if (pport != NULL) {
		port = pport->after(" ").toInteger();
		if (port == 0 || port > 65535)
			port = (https ? 443 : 80);
	}
	if (answer.length()) {
		if (answer[0] == '/') {	// must be the latter above
			if (phost != NULL) {
				hostname = phost->after(" ");
				hostname.removeWhiteSpace();
				if (hostname.contains(":"))
				{
					port = hostname.after(":").toInteger();
					if (port == 0 || port > 65535) {
						port = (https ? 443 : 80);
					}
					hostname = hostname.before(":");
				}
				while (hostname.endsWith("."))
					hostname.chop();
				if (withport && (port != (https ? 443 : 80)))
					hostname += ":" + String(port);
				hostname = "http://" + hostname;
				answer = hostname + answer;
			}
			// Squid doesn't like requests in this format. Work around the fact.
			header.front() = requestType() + " " + answer + " HTTP/" + header.front().after(" HTTP/");
		} else {	// must be in the form GET http://foo.bar:80/ HTML/1.0
			if (!answer.after("://").contains("/")) {
				answer += "/";  // needed later on so correct host is extracted
			}
			String protocol(answer.before("://"));
			hostname = answer.after("://");
			String url(hostname.after("/"));
			url.removeWhiteSpace();  // remove rubbish like ^M and blanks
			if (url.length() > 0) {
				url = "/" + url;
			}
			hostname = hostname.before("/");  // extra / was added 4 here
			if (hostname.contains("@")) {	// Contains a username:password combo
				hostname = hostname.after("@");
			}
			if (hostname.contains(":")) {
				port = hostname.after(":").toInteger();
				if (port == 0 || port > 65535) {
					port = (https ? 443 : 80);
				}
				hostname = hostname.before(":");  // chop off the port bit
			}
			while (hostname.endsWith("."))
				hostname.chop();
			if (withport && (port != (https ? 443 : 80)))
				hostname += ":" + String(port);
			answer = protocol + "://" + hostname + url;
		}
	}
	if (answer.endsWith("//")) {
		answer.chop();
	}
#ifdef DGDEBUG
	std::cout << "from header url:" << answer << std::endl;
#endif
	// Don't include port numbers in the URL in the cached version.
	// Most of the code only copes with URLs *without* port specifiers.
	if (!withport)
		cachedurl = answer.toCharArray();
	return answer;
}
// is a URL malformed?
bool HTTPHeader::malformedURL(const String& url)
{
	String host(url.after("://"));
	if (host.contains("/"))
		host = host.before("/");
	if (host.length() < 2) {
#ifdef DGDEBUG
		std::cout << "host len too small" << std::endl;
#endif
		return true;
	}
	if (host.contains(":"))
		host = host.before(":");
	if (host.contains("..") || host.endsWith(".")) {
#ifdef DGDEBUG
		std::cout << "double dots in domain name" << std::endl;
#endif
		return true;
	}
	int i, len;
	unsigned char c;
	len = host.length();
	bool containsletter = false;
	for (i = 0; i < len; i++) {
		c = (unsigned char) host[i];
		// If it contains something other than numbers, dots, or [a-fx] (hex encoded IPs),
		// IP obfuscation can be ruled out.
		if (!containsletter &&
				(((c < '0') || (c > '9'))
				 && (c != '.') && (c != 'x') && (c != 'X')
				 && ((c < 'a') || (c > 'f'))
				 && ((c < 'A') || (c > 'F'))))
			containsletter = true;
		if (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z')
			&& !(c >= '0' && c <= '9') && c != '.' && c != '-' && c != '_') {
#ifdef DGDEBUG
			std::cout << "bad char in hostname" << std::endl;
#endif
			return true;
			// only allowed letters, digits, hiphen, dots
		}

	}
	// no IP obfuscation going on
	if (containsletter)
		return false;
#ifdef DGDEBUG
	else
		std::cout << "Checking for IP obfuscation in " << host << std::endl;
#endif
	// Check no IP obfuscation is going on
	// This includes IPs encoded as a single decimal number,
	// fully or partly hex encoded, and octal encoded
	bool first = true;
	bool obfuscation = false;
	if (host.endsWith("."))
		host.chop();
	do {
		if (!first)
			host = host.after(".");
		first = false;
		String hostpart(host);
		if (host.contains("."))
			hostpart = hostpart.before(".");
		// If any part of the host starts with a letter, any letter,
		// then we must have a hostname rather than an IP (obscured
		// or otherwise).  TLDs never start with a number.
		if ((hostpart[0] >= 'a' && hostpart[0] <= 'z') || (hostpart[0] >= 'A' && hostpart[0] <= 'Z'))
			return false;
		// If any part of the host begins with 0, it may be hex or octal
		if ((hostpart[0] == '0') && (hostpart.length() > 1))
		{
			obfuscation = true;
			continue;
		}
		// Also check range, for decimal obfuscation.
		int part = hostpart.toInteger();
		if ((part < 0) || (part > 255))
			obfuscation = true;
	} while (host.contains("."));
	// If we have any obfuscated parts, and haven't proven it's a hostname, it's invalid.
	return obfuscation;
}
// modifies the URL in all relevant header lines after a regexp search and replace
// setURL Code originally from from Ton Gorter 2004
void HTTPHeader::setURL(String &url) {
	String hostname;
	bool https = (url.before("://") == "https");
	int port = (https ? 443 : 80);

	if (!url.after("://").contains("/")) {
		url += "/";
	}
	hostname = url.after("://").before("/");
	if (hostname.contains("@")) { // Contains a username:password combo
		hostname = hostname.after("@");
	}
	if (hostname.contains(":")) {
		port = hostname.after(":").toInteger();
		if (port == 0 || port > 65535) {
			port = (https ? 443 : 80);
		}
		hostname = hostname.before(":");  // chop off the port bit
	}

#ifdef DGDEBUG
	std::cout << "setURL: header.front() changed from: " << header.front() << std::endl;
#endif
	if (!https)
		header.front() = header.front().before(" ") + " " + url + " " + header.front().after(" ").after(" ");
	else
		// Should take form of "CONNECT example.com:443 HTTP/1.0" for SSL
		header.front() = header.front().before(" ") + " " + hostname + ":" + String(port) + " " + header.front().after(" ").after(" ");
#ifdef DGDEBUG
	std::cout << " to: " << header.front() << std::endl;
#endif

	if (phost != NULL) {
#ifdef DGDEBUG
		std::cout << "setURL: header[] line changed from: " << (*phost) << std::endl;
#endif
		(*phost) = String("Host: ") + hostname;
		if (port != (https ? 443 : 80))
		{
			(*phost) += ":";
			(*phost) += String(port);
		}
		(*phost) += "\r";
#ifdef DGDEBUG
		std::cout << " to " << (*phost) << std::endl;
#endif
	}
	if (pport != NULL) {
#ifdef DGDEBUG
		std::cout << "setURL: header[] line changed from: " << (*pport) << std::endl;
#endif
		(*pport) = String("Port: ") + String(port) + "\r";
#ifdef DGDEBUG
		std::cout << " to " << (*pport) << std::endl;
#endif
	}
	// Don't just cache the URL we're sent - url() performs some other
	// processing, notably stripping the port part. Caching here will
	// bypass all that.
	//cachedurl = url.toCharArray();
}