MolecularSimilarity::MolecularSimilarity(String smarts_file) { Path path; String file = path.find(smarts_file); if(file=="") { throw BALL::Exception::FileNotFound(__FILE__,__LINE__,smarts_file); } std::ifstream smart_input(file.c_str()); // read SMARTS-expression and names for those SMARTS from the specified file for(Size i=0; smart_input; i++) { if(i%300==0) // prevent frequent resizing { int a = (i/300)+1; smarts_.reserve(a*300); smart_names_.reserve(a*300); } String line; getline(smart_input,line); line.trim(); if(line!="") { stringstream lstream(line); string s; lstream >> s; // read first word but ignore the following comment (name/description of functional group) if(s!="") smarts_.push_back(s); if(line.hasSubstring("\t")) smart_names_.push_back(String(line.after("\t")).trim()); else { throw BALL::Exception::GeneralException(__FILE__,__LINE__,"MolecularSimilarity error","SMARTS file has wrong format! Maybe tabs are missing."); } } }
bool Potential1210::extractSection (ForceFieldParameters& parameters, const String& section_name) { // clear the fields first clear(); // check whether the parameters are valid if (!parameters.isValid()) { return false; } // extract the basis information ParameterSection::extractSection(parameters, section_name); // check whether all variables we need are defined, terminate otherwise if (!hasVariable("A") || !hasVariable("B")) { return false; } // build a two dimensional array of the atom types // loop variable Size i; AtomTypes& atom_types = parameters.getAtomTypes(); number_of_atom_types_ = atom_types.getNumberOfTypes(); // allocate two onedimensional fields for the two parameters A_.resize(number_of_atom_types_ * number_of_atom_types_); B_.resize(number_of_atom_types_ * number_of_atom_types_); is_defined_.resize(number_of_atom_types_ * number_of_atom_types_); for (i = 0; i < number_of_atom_types_ * number_of_atom_types_; i++) { is_defined_[i] = false; } StringHashMap<Index>::Iterator it; // determine the factor to convert the parameters to the standard units used // as a default, energies are assumend to be in kJ/mol and distances in Angstrom double factor_A = 1.0; double factor_B = 1.0; if (options.has("unit_A")) { if (options["unit_A"] == "kcal/mol*A^12") { factor_A = Constants::JOULE_PER_CAL; } else { Log.warn() << "unknown unit for parameter A: " << options["unit_A"] << endl; } } if (options.has("unit_B")) { if (options["unit_B"] == "kcal/mol*A^10") { factor_B = Constants::JOULE_PER_CAL; } else { Log.warn() << "unknown unit for parameter B: " << options["unit_B"] << endl; } } Atom::Type type_I; Atom::Type type_J; String type_name_I; String type_name_J; String key; Index index = 0; for (it = section_entries_.begin(); !(it == section_entries_.end()); ++it) { key = (*it).first; if ((key.size() > 0) && (key.find_first_of(" ", 0) > 0)) { type_name_I = key.before(" ", 0); type_name_J = key.after(" ", 0); if ((atom_types.hasType(type_name_I)) && (atom_types.hasType(type_name_J))) { type_I = atom_types.getType(type_name_I); type_J = atom_types.getType(type_name_J); index = (Index)(type_I * number_of_atom_types_ + type_J); is_defined_[index] = true; A_ [index] = getValue(key, "A").toFloat() * factor_A; B_ [index] = getValue(key, "B").toFloat() * factor_B; index = (Index)(type_I + number_of_atom_types_ * type_J); is_defined_[index] = true; A_ [index] = getValue(key, "A").toFloat() * factor_A; B_ [index] = getValue(key, "B").toFloat() * factor_B; } } } return true; }
// the meat of the process void NaughtyFilter::checkPICSrating(std::string label, unsigned int filtergroup) { (*o.fg[filtergroup]).pics2.match(label.c_str()); if (!(*o.fg[filtergroup]).pics2.matched()) { return; } // exit if not found String lab(label.c_str()); // convert to a String for easy manip String r; String service; for (int i = 0; i < (*o.fg[filtergroup]).pics2.numberOfMatches(); i++) { r = (*o.fg[filtergroup]).pics2.result(i).c_str(); // ditto r = r.after("("); r = r.before(")"); // remove the brackets // Only check the substring of lab that is between // the start of lab (or the end of the previous match) // and the start of this rating. // It is possible to have multiple ratings in one pics-label. // This is done on e.g. http://www.jesusfilm.org/ if (i == 0) { service = lab.subString(0, (*o.fg[filtergroup]).pics2.offset(i)); } else { service = lab.subString((*o.fg[filtergroup]).pics2.offset(i - 1) + (*o.fg[filtergroup]).pics2.length(i - 1), (*o.fg[filtergroup]).pics2.offset(i)); } if (service.contains("safesurf")) { checkPICSratingSafeSurf(r, filtergroup); if (isItNaughty) { return; } } if (service.contains("evaluweb")) { checkPICSratingevaluWEB(r, filtergroup); if (isItNaughty) { return; } } if (service.contains("microsys")) { checkPICSratingCyberNOT(r, filtergroup); if (isItNaughty) { return; } } if (service.contains("icra")) { checkPICSratingICRA(r, filtergroup); if (isItNaughty) { return; } } if (service.contains("rsac")) { checkPICSratingRSAC(r, filtergroup); if (isItNaughty) { return; } } if (service.contains("weburbia")) { checkPICSratingWeburbia(r, filtergroup); if (isItNaughty) { return; } } if (service.contains("vancouver")) { checkPICSratingVancouver(r, filtergroup); if (isItNaughty) { return; } } if (service.contains("icec")) { checkPICSratingICEC(r, filtergroup); if (isItNaughty) { return; } } if (service.contains("safenet")) { checkPICSratingSafeNet(r, filtergroup); if (isItNaughty) { return; } } // check label for word denoting rating system then pass on to the // appropriate function the rating String. } }
// check the phrase lists void NaughtyFilter::checkphrase(char *file, off_t filelen, const String *url, const String *domain, unsigned int filtergroup, unsigned int phraselist, int limit, bool searchterms) { int weighting = 0; int cat; std::string weightedphrase; // checkme: translate this? String currcat("Embedded URLs"); // found categories list & reusable iterators std::map<int, listent> listcategories; // check for embedded references to banned sites/URLs. // have regexes that check for URLs in pages (look for attributes (src, href, javascript location) // or look for protocol strings (in which case, incl. ftp)?) and extract them. // then check the extracted list against the banned site/URL lists. // ADs category lists do not want to add to the possibility of a site being banned. // Exception lists are not checked. // Do not do full-blown category retrieval/duplicate checking; simply add the // "Embedded URLs" category. // Put a warning next to the option in the config file that this will take lots of CPU. // Support phrase mode 1/2 distinction (duplicate sites/URLs). // Have weight configurable per filter group, not globally or with a list directive - // a weight of 0 will disable the option, effectively making this functionality per-FG itself. // todo: if checkphrase is passed the domain & existing URL, it can create full URLs from relative ones. // if a src/href URL starts with a /, append it to the domain; otherwise, append it to the existing URL. // chop off anything after a ?, run through realPath, then put through the URL lists. #ifdef HAVE_PCRE // if weighted phrases are enabled, and we have been passed a URL and domain, and embedded URL checking is enabled... // then check for embedded URLs! if (url != NULL && o.fg[filtergroup]->embedded_url_weight > 0) { std::map<int, listent>::iterator ourcat; bool catinited = false; std::map<String, unsigned int> found; std::map<String, unsigned int>::iterator founditem; String u; char* j; // check for absolute URLs if (absurl_re.match(file)) { // each match generates 2 results (because of the brackets in the regex), we're only interested in the first #ifdef DGDEBUG std::cout << "Found " << absurl_re.numberOfMatches()/2 << " absolute URLs:" << std::endl; #endif for (int i = 0; i < absurl_re.numberOfMatches(); i+=2) { // chop off quotes u = absurl_re.result(i); u = u.subString(1,u.length()-2); #ifdef DGDEBUG std::cout << u << std::endl; #endif if ((((j = o.fg[filtergroup]->inBannedSiteList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_site_list]->lastcategory.contains("ADs"))) || (((j = o.fg[filtergroup]->inBannedURLList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_url_list]->lastcategory.contains("ADs")))) { // duplicate checking // checkme: this should really be being done *before* we search the lists. // but because inBanned* methods do some cleaning up of their own, we don't know the form to check against. // we actually want these cleanups do be done before passing to inBanned*/inException* - this would // speed up ConnectionHandler a bit too. founditem = found.find(j); if ((o.fg[filtergroup]->weighted_phrase_mode == 2) && (founditem != found.end())) { founditem->second++; } else { // add the site to the found phrases list found[j] = 1; if (weightedphrase.length() == 0) weightedphrase = "["; else weightedphrase += " "; weightedphrase += j; if (!catinited) { listcategories[-1] = listent(o.fg[filtergroup]->embedded_url_weight,currcat); ourcat = listcategories.find(-1); catinited = true; } else ourcat->second.weight += o.fg[filtergroup]->embedded_url_weight; } } } } found.clear(); // check for relative URLs if (relurl_re.match(file)) { // we don't want any parameters on the end of the current URL, since we append to it directly // when forming absolute URLs from relative ones. we do want a / on the end, too. String currurl(*url); if (currurl.contains("?")) currurl = currurl.before("?"); if (currurl[currurl.length()-1] != '/') currurl += "/"; // each match generates 2 results (because of the brackets in the regex), we're only interested in the first #ifdef DGDEBUG std::cout << "Found " << relurl_re.numberOfMatches()/2 << " relative URLs:" << std::endl; #endif for (int i = 0; i < relurl_re.numberOfMatches(); i+=2) { u = relurl_re.result(i); // can't find a way to negate submatches in PCRE, so it is entirely possible // that some absolute URLs have made their way into this list. we don't want them. if (u.contains("://")) continue; #ifdef DGDEBUG std::cout << u << std::endl; #endif // remove src/href & quotes u = u.after("="); u.removeWhiteSpace(); u = u.subString(1,u.length()-2); // create absolute URL if (u[0] == '/') u = (*domain) + u; else u = currurl + u; #ifdef DGDEBUG std::cout << "absolute form: " << u << std::endl; #endif if ((((j = o.fg[filtergroup]->inBannedSiteList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_site_list]->lastcategory.contains("ADs"))) || (((j = o.fg[filtergroup]->inBannedURLList(u)) != NULL) && !(o.lm.l[o.fg[filtergroup]->banned_url_list]->lastcategory.contains("ADs")))) { // duplicate checking // checkme: this should really be being done *before* we search the lists. // but because inBanned* methods do some cleaning up of their own, we don't know the form to check against. // we actually want these cleanups do be done before passing to inBanned*/inException* - this would // speed up ConnectionHandler a bit too. founditem = found.find(j); if ((o.fg[filtergroup]->weighted_phrase_mode == 2) && (founditem != found.end())) { founditem->second++; } else { // add the site to the found phrases list found[j] = 1; if (weightedphrase.length() == 0) weightedphrase = "["; else weightedphrase += " "; weightedphrase += j; if (!catinited) { listcategories[-1] = listent(o.fg[filtergroup]->embedded_url_weight,currcat); ourcat = listcategories.find(-1); catinited = true; } else ourcat->second.weight += o.fg[filtergroup]->embedded_url_weight; } } } } if (catinited) { weighting = ourcat->second.weight; weightedphrase += "]"; #ifdef DGDEBUG std::cout << weightedphrase << std::endl; std::cout << "score from embedded URLs: " << ourcat->second.weight << std::endl; #endif } } #endif std::string bannedphrase; std::string exceptionphrase; String bannedcategory; int type, index, weight, time; bool allcmatched = true, bannedcombi = false; std::string s1; // this line here searches for phrases contained in the list - the rest of the code is all sorting // through it to find the categories, weightings, types etc. of what has actually been found. std::map<std::string, std::pair<unsigned int, int> > found; o.lm.l[phraselist]->graphSearch(found, file, filelen); // cache reusable iterators std::map<std::string, std::pair<unsigned int, int> >::iterator foundend = found.end(); std::map<std::string, std::pair<unsigned int, int> >::iterator foundcurrent; // look for combinations first //if banned must wait for exception later std::string combifound; std::string combisofar; std::vector<int>::iterator combicurrent = o.lm.l[phraselist]->combilist.begin(); std::map<int, listent>::iterator catcurrent; int lowest_occurrences = 0; while (combicurrent != o.lm.l[phraselist]->combilist.end()) { // Grab the current combination phrase part index = *combicurrent; // Do stuff if what we have is an end marker (end of one list of parts) if (index == -2) { // Were all the parts in this combination matched? if (allcmatched) { type = *(++combicurrent); // check this time limit against the list of time limits time = *(++combicurrent); if (not (o.lm.l[phraselist]->checkTimeAtD(time))) { // nope - so don't take any notice of it #ifdef DGDEBUG combicurrent++; cat = (*++combicurrent); std::cout << "Ignoring combi phrase based on time limits: " << combisofar << "; " << o.lm.l[phraselist]->getListCategoryAtD(cat) << std::endl; #else combicurrent += 2; #endif combisofar = ""; } else if (type == -1) { // combination exception isItNaughty = false; isException = true; // Combination exception phrase found: // Combination exception search term found: message_no = searchterms ? 456 : 605; whatIsNaughtyLog = o.language_list.getTranslation(message_no); whatIsNaughtyLog += combisofar; whatIsNaughty = ""; ++combicurrent; cat = *(++combicurrent); whatIsNaughtyCategories = o.lm.l[phraselist]->getListCategoryAtD(cat); return; } else if (type == 1) { // combination weighting weight = *(++combicurrent); weighting += weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : lowest_occurrences); if (weight > 0) { cat = *(++combicurrent); //category index -1 indicates an uncategorised list if (cat >= 0) { //don't output duplicate categories catcurrent = listcategories.find(cat); if (catcurrent != listcategories.end()) { catcurrent->second.weight += weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : lowest_occurrences); } else { currcat = o.lm.l[phraselist]->getListCategoryAtD(cat); listcategories[cat] = listent(weight,currcat); } } } else { // skip past category for negatively weighted phrases combicurrent++; } if (weightedphrase.length() > 0) { weightedphrase += "+"; } weightedphrase += "("; if (weight < 0) { weightedphrase += "-" + combisofar; } else { weightedphrase += combisofar; } #ifdef DGDEBUG std::cout << "found combi weighted phrase ("<< o.fg[filtergroup]->weighted_phrase_mode << "): " << combisofar << " x" << lowest_occurrences << " (per phrase: " << weight << ", calculated: " << (weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : lowest_occurrences)) << ")" << std::endl; #endif weightedphrase += ")"; combisofar = ""; } else if (type == 0) { // combination banned bannedcombi = true; combifound += "(" + combisofar + ")"; combisofar = ""; combicurrent += 2; cat = *(combicurrent); bannedcategory = o.lm.l[phraselist]->getListCategoryAtD(cat); } } else { // We had an end marker, but not all the parts so far were matched. // Reset the match flag ready for the next chain, and advance to its first part. allcmatched = true; combicurrent += 4; lowest_occurrences = 0; } } else { // We didn't get an end marker - just an individual part. // If all parts in the current chain have been matched so far, look for this one as well. if (allcmatched) { s1 =o.lm.l[phraselist]->getItemAtInt(index); if ((foundcurrent = found.find(s1)) == foundend) { allcmatched = false; combisofar = ""; } else { if (combisofar.length() > 0) { combisofar += ", "; } combisofar += s1; // also track lowest number of times any one part occurs in the text // as this will correspond to the number of times the whole chain occurs if ((lowest_occurrences == 0) || (lowest_occurrences > foundcurrent->second.second)) { lowest_occurrences = foundcurrent->second.second; } } } } // Advance to the next part in the current chain combicurrent++; } // even if we already found a combi ban, we must still wait; there may be non-combi exceptions to follow // now check non-combi phrases foundcurrent = found.begin(); while (foundcurrent != foundend) { // check time for current phrase if (not o.lm.l[phraselist]->checkTimeAt(foundcurrent->second.first)) { #ifdef DGDEBUG std::cout << "Ignoring phrase based on time limits: " << foundcurrent->first << ", " << o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first) << std::endl; #endif foundcurrent++; continue; } // 0=banned, 1=weighted, -1=exception, 2=combi, 3=weightedcombi type = o.lm.l[phraselist]->getTypeAt(foundcurrent->second.first); if (type == 0) { // if we already found a combi ban, we don't need to know this stuff if (!bannedcombi) { isItNaughty = true; bannedphrase = foundcurrent->first; bannedcategory = o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first, &cat); } } else if (type == 1) { // found a weighted phrase - either add one lot of its score, or one lot for every occurrence, depending on phrase filtering mode weight = o.lm.l[phraselist]->getWeightAt(foundcurrent->second.first) * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : foundcurrent->second.second); weighting += weight; if (weight > 0) { currcat = o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first, &cat); if (cat >= 0) { //don't output duplicate categories catcurrent = listcategories.find(cat); if (catcurrent != listcategories.end()) { // add one or N times the weight to this category's score catcurrent->second.weight += weight * (o.fg[filtergroup]->weighted_phrase_mode == 2 ? 1 : foundcurrent->second.second); } else { listcategories[cat] = listent(weight,currcat); } } } if (o.show_weighted_found) { if (weightedphrase.length() > 0) { weightedphrase += "+"; } if (weight < 0) { weightedphrase += "-"; } weightedphrase += foundcurrent->first; } #ifdef DGDEBUG std::cout << "found weighted phrase ("<< o.fg[filtergroup]->weighted_phrase_mode << "): " << foundcurrent->first << " x" << foundcurrent->second.second << " (per phrase: " << o.lm.l[phraselist]->getWeightAt(foundcurrent->second.first) << ", calculated: " << weight << ")" << std::endl; #endif } else if (type == -1) { isException = true; isItNaughty = false; // Exception phrase found: // Exception search term found: message_no = searchterms ? 457 : 604; whatIsNaughtyLog = o.language_list.getTranslation(message_no); whatIsNaughtyLog += foundcurrent->first; whatIsNaughty = ""; whatIsNaughtyCategories = o.lm.l[phraselist]->getListCategoryAt(foundcurrent->second.first, NULL); return; // no point in going further } foundcurrent++; } #ifdef DGDEBUG std::cout << "WEIGHTING: " << weighting << std::endl; #endif // store the lowest negative weighting or highest positive weighting out of all filtering runs, preferring to store positive weightings. if ((weighting < 0 && naughtiness <= 0 && weighting < naughtiness) || (naughtiness >= 0 && weighting > naughtiness) || (naughtiness < 0 && weighting > 0) ) { naughtiness = weighting; } #ifdef DGDEBUG std::cout << "NAUGHTINESS: " << naughtiness << std::endl; #endif // *now* we can safely get down to the whole banning business! if (bannedcombi) { isItNaughty = true; // Banned combination phrase found: // Banned combination search term found: message_no = searchterms ? 452: 400; whatIsNaughtyLog = o.language_list.getTranslation(message_no); whatIsNaughtyLog += combifound; // Banned combination phrase found. // Banned combination search term found. whatIsNaughty = o.language_list.getTranslation(searchterms ? 453 : 401); whatIsNaughtyCategories = bannedcategory.toCharArray(); return; } if (isItNaughty) { // Banned phrase found: // Banned search term found: message_no = searchterms ? 450: 300; whatIsNaughtyLog = o.language_list.getTranslation(message_no); whatIsNaughtyLog += bannedphrase; // Banned phrase found. // Banned search term found. whatIsNaughty = o.language_list.getTranslation(searchterms ? 451 : 301); whatIsNaughtyCategories = bannedcategory.toCharArray(); return; } if (weighting > limit) { isItNaughty = true; // Weighted phrase limit of // Weighted search term limit of message_no = searchterms ? 454: 401; whatIsNaughtyLog = o.language_list.getTranslation(message_no); whatIsNaughtyLog += String(limit).toCharArray(); whatIsNaughtyLog += " : "; whatIsNaughtyLog += String(weighting).toCharArray(); if (o.show_weighted_found) { whatIsNaughtyLog += " ("; whatIsNaughtyLog += weightedphrase; whatIsNaughtyLog += ")"; } // Weighted phrase limit exceeded. // Weighted search term limit exceeded. whatIsNaughty = o.language_list.getTranslation(searchterms ? 455 : 403); // Generate category list, sorted with highest scoring first. bool nonempty = false; bool belowthreshold = false; String categories; std::deque<listent> sortable_listcategories; catcurrent = listcategories.begin(); while (catcurrent != listcategories.end()) { sortable_listcategories.push_back(catcurrent->second); catcurrent++; } std::sort(sortable_listcategories.begin(), sortable_listcategories.end()); std::deque<listent>::iterator k = sortable_listcategories.begin(); while (k != sortable_listcategories.end()) { // if category display threshold is in use, apply it if (!belowthreshold && (o.fg[filtergroup]->category_threshold > 0) && (k->weight < o.fg[filtergroup]->category_threshold)) { whatIsNaughtyDisplayCategories = categories.toCharArray(); belowthreshold = true; usedisplaycats = true; } if (k->string.length() > 0) { if (nonempty) categories += ", "; categories += k->string; nonempty = true; } k++; // if category threshold is set to show only the top category, // everything after the first loop is below the threshold if (!belowthreshold && o.fg[filtergroup]->category_threshold < 0) { whatIsNaughtyDisplayCategories = categories.toCharArray(); belowthreshold = true; usedisplaycats = true; } } whatIsNaughtyCategories = categories.toCharArray(); return; } // whatIsNaughty is what is displayed in the browser // whatIsNaughtyLog is what is logged in the log file if at all }
// check data received from ICAP server and interpret as virus name & return value int icapinstance::doScan(Socket & icapsock, HTTPHeader * docheader, const char* object, unsigned int objectsize, NaughtyFilter * checkme) { char *data = new char[8192]; try { String line; int rc = icapsock.getLine(data, 8192, o.content_scanner_timeout); if (rc == 0) return ICAP_NODATA; line = data; #ifdef DGDEBUG std::cout << "reply from icap: " << line << std::endl; #endif // reply is of the format: // ICAP/1.0 204 No Content Necessary (etc) String returncode(line.after(" ").before(" ")); if (returncode == "204") { #ifdef DGDEBUG std::cerr << "ICAP says clean!" << std::endl; #endif delete[]data; return DGCS_CLEAN; } else if (returncode == "100") { #ifdef DGDEBUG std::cerr << "ICAP says continue!" << std::endl; #endif // discard rest of headers (usually just a blank line) // this is so we are in the right place in the data stream to // call doScan() again later, because people like Symantec seem // to think sending code 100 then code 204 one after the other // is not an abuse of the ICAP specification. while (icapsock.getLine(data, 8192, o.content_scanner_timeout) > 0) { if (data[0] == 13) break; } delete[]data; return ICAP_CONTINUE; } else if (returncode == "200") { #ifdef DGDEBUG std::cerr << "ICAP says maybe not clean!" << std::endl; #endif while (icapsock.getLine(data, 8192, o.content_scanner_timeout) > 0) { if (data[0] == 13) // end marker break; line = data; // Symantec's engine gives us the virus name in the ICAP headers if (supportsXIF && line.startsWith("X-Infection-Found")) { #ifdef DGDEBUG std::cout << "ICAP says infected! (X-Infection-Found)" << std::endl; #endif lastvirusname = line.after("Threat=").before(";"); delete[]data; blockFile(NULL,NULL,checkme); return DGCS_INFECTED; } } // AVIRA's Antivir gives us 200 in all cases, so // - unfortunately - we must pay attention to the encapsulated // header/body. if (needsBody) { // grab & compare the HTTP return code from modified response // if it's been modified, assume there's an infection icapsock.getLine(data, 8192, o.content_scanner_timeout); line = data; #ifdef DGDEBUG std::cout << "Comparing original return code to modified:" << std::endl << docheader->header.front() << std::endl << line << std::endl; #endif int respmodReturnCode = line.after(" ").before(" ").toInteger(); if (respmodReturnCode != docheader->returnCode()) { #ifdef DGDEBUG std::cerr << "ICAP says infected! (returned header comparison)" << std::endl; #endif delete[] data; lastvirusname = "Unknown"; blockFile(NULL,NULL,checkme); return DGCS_INFECTED; } // ok - headers were identical, so look at encapsulated body // discard the rest of the encapsulated headers while (icapsock.getLine(data, 8192, o.content_scanner_timeout) > 0) { if (data[0] == 13) break; } // grab body chunk size #ifdef DGDEBUG std::cout << "Comparing original body data to modified" << std::endl; #endif icapsock.getLine(data, 8192, o.content_scanner_timeout); line = data; int bodysize = line.hexToInteger(); // get, say, the first 100 bytes and compare them to what we // originally sent to see if it has been modified unsigned int chunksize = (bodysize < 100) ? bodysize : 100; if (chunksize > objectsize) chunksize = objectsize; icapsock.readFromSocket(data, chunksize, 0, o.content_scanner_timeout); if (memcmp(data, object, chunksize) == 0) { #ifdef DGDEBUG std::cerr << "ICAP says clean!" << std::endl; #endif delete[]data; return DGCS_CLEAN; } else { #ifdef DGDEBUG std::cerr << "ICAP says infected! (body byte comparison)" << std::endl; #endif delete[] data; lastvirusname = "Unknown"; blockFile(NULL,NULL,checkme); return DGCS_INFECTED; } } // even if we don't find an X-Infection-Found header, // the file is still infected! #ifdef DGDEBUG std::cerr << "ICAP says infected! (no further tests)" << std::endl; #endif delete[] data; lastvirusname = "Unknown"; blockFile(NULL,NULL,checkme); return DGCS_INFECTED; } else if (returncode == "404") { #ifdef DGDEBUG std::cerr << "ICAP says no such service!" << std::endl; #endif lastmessage = "ICAP reports no such service"; syslog(LOG_ERR, "ICAP reports no such service; check your server URL"); delete[]data; return DGCS_SCANERROR; } else { #ifdef DGDEBUG std::cerr << "ICAP returned unrecognised response code: " << returncode << std::endl; #endif lastmessage = "ICAP returned unrecognised response code."; syslog(LOG_ERR, "ICAP returned unrecognised response code: %s", returncode.toCharArray()); delete[]data; return DGCS_SCANERROR; } delete[]data; } catch(std::exception & e) { #ifdef DGDEBUG std::cerr << "Exception getting reply from ICAP: " << e.what() << std::endl; #endif lastmessage = "Exception getting reply from ICAP."; syslog(LOG_ERR, "Exception getting reply from ICAP: %s", e.what()); delete[]data; return DGCS_SCANERROR; } // it is generally NOT a good idea, when using virus scanning, // to continue as if nothing went wrong by default! return DGCS_SCANERROR; }
// initialise the plugin - determine icap ip, port & url int icapinstance::init(void* args) { // always include these lists if (!readStandardLists()) { return DGCS_ERROR; } icapurl = cv["icapurl"]; // format: icap://icapserver:1344/avscan if (icapurl.length() < 3) { if (!is_daemonised) std::cerr << "Error reading icapurl option." << std::endl; syslog(LOG_ERR, "Error reading icapurl option."); return DGCS_ERROR; // it would be far better to do a test connection } icaphost = icapurl.after("//"); icapport = icaphost.after(":").before("/").toInteger(); if (icapport == 0) { icapport = 1344; } icaphost = icaphost.before("/"); if (icaphost.contains(":")) { icaphost = icaphost.before(":"); } struct hostent *host; if ((host = gethostbyname(icaphost.toCharArray())) == 0) { if (!is_daemonised) std::cerr << "Error resolving icap host address." << std::endl; syslog(LOG_ERR, "Error resolving icap host address."); return DGCS_ERROR; } icapip = inet_ntoa(*(struct in_addr *) host->h_addr_list[0]); #ifdef DGDEBUG std::cerr << "ICAP server address:" << icapip << std::endl; #endif // try to connect to the ICAP server and perform an OPTIONS request Socket icapsock; try { if (icapsock.connect(icapip.toCharArray(), icapport) < 0) { throw std::runtime_error("Could not connect to server"); } String line("OPTIONS " + icapurl + " ICAP/1.0\r\nHost: " + icaphost + "\r\n\r\n"); icapsock.writeString(line.toCharArray()); // parse the response char buff[8192]; // first line - look for 200 OK icapsock.getLine(buff, 8192, o.content_scanner_timeout); line = buff; #ifdef DGDEBUG std::cout << "ICAP/1.0 OPTIONS response:" << std::endl << line << std::endl; #endif if (line.after(" ").before(" ") != "200") { if (!is_daemonised) std::cerr << "ICAP response not 200 OK" << std::endl; syslog(LOG_ERR, "ICAP response not 200 OK"); return DGCS_WARNING; //throw std::runtime_error("Response not 200 OK"); } while (icapsock.getLine(buff, 8192, o.content_scanner_timeout) > 0) { line = buff; #ifdef DGDEBUG std::cout << line << std::endl; #endif if (line.startsWith("\r")) { break; } else if (line.startsWith("Preview:")) { usepreviews = true; previewsize = line.after(": ").toInteger(); } else if (line.startsWith("Server:")) { if (line.contains("AntiVir-WebGate")) { needsBody = true; } } else if (line.startsWith("X-Allow-Out:")) { if (line.contains("X-Infection-Found")) { supportsXIF = true; } } } icapsock.close(); } catch(std::exception& e) { if (!is_daemonised) std::cerr << "ICAP server did not respond to OPTIONS request: " << e.what() << std::endl; syslog(LOG_ERR, "ICAP server did not respond to OPTIONS request: %s", e.what()); return DGCS_ERROR; } #ifdef DGDEBUG if (usepreviews) std::cout << "Message previews enabled; size: " << previewsize << std::endl; else std::cout << "Message previews disabled" << std::endl; #endif return DGCS_OK; }
// default method for deciding whether we will handle a request bool DMPlugin::willHandle(HTTPHeader *requestheader, HTTPHeader *docheader) { // match user agent first (quick) if (!(alwaysmatchua || ua_match.match(requestheader->userAgent().toCharArray()))) return false; // then check standard lists (mimetypes & extensions) // mimetypes String mimetype(""); bool matchedmime = false; if (mimelistenabled) { mimetype = docheader->getContentType(); #ifdef DGDEBUG std::cout<<"mimetype: "<<mimetype<<std::endl; #endif if (mimetypelist.findInList(mimetype.toCharArray()) == NULL) { if (!extensionlistenabled) return false; } else matchedmime = true; } if (extensionlistenabled && !matchedmime) { // determine the extension String path(requestheader->decode(requestheader->url())); path.removeWhiteSpace(); path.toLower(); path.removePTP(); path = path.after("/"); path.hexDecode(); path.realPath(); String disposition(docheader->disposition()); String extension; if (disposition.length() > 2) { extension = disposition; while (extension.contains(".")) { extension = extension.after("."); } extension = "." + extension; } else { if (!path.contains("?")) { extension = path; } else { if (mimetype.length() == 0) mimetype = docheader->getContentType(); if (mimetype.contains("application/")) { extension = path; if (extension.contains("?")) { extension = extension.before("?"); } } } } #ifdef DGDEBUG std::cout<<"extension: "<<extension<<std::endl; #endif // check the extension list if (!extension.contains(".") || (extensionlist.findEndsWith(extension.toCharArray()) == NULL)) return matchedmime; } return true; }
void DockingAlgorithm::readOptionFile(String filename, Options& output_options, list<Constraint*>& output_constraints, const AtomContainer* ref_ligand) { INIFile ini(filename); ini.read(); Size num_sections = ini.getNumberOfSections(); for(Size i = 0; i < num_sections; i++) { String name = ini.getSection(i)->getName(); if (name.hasPrefix("ReferenceArea") || name.hasPrefix("PharmacophoreConstraint")) { continue; } Options* options_category = &output_options; if (name != "Docking-Settings") { options_category = output_options.createSubcategory(name); } Log.level(10)<<endl<<"--- Reading parameter-section '" << name << "' from file "<<"'"<<filename<<"' : -----"<<endl; INIFile::LineIterator it = ini.getSectionFirstLine(name); INIFile::LineIterator it_end = ini.getSectionLastLine(name).getSectionNextLine(); it.getSectionNextLine(); for (; it != it_end; it.getSectionNextLine()) { String line = *it; if (line == "") continue; String key = line.before("="); key.trim(); String value = line.after("="); value.trim(); if (key == "" || value == "") continue; options_category->set(key, value); if (name == ScoringFunction::SUBCATEGORY_NAME || name == "IMGDock") { Log.level(10)<<key<<" : "<<value<<endl; } } } for (Size i = 0; i < 100; i++) { string sec_name = "ReferenceArea"+String(i); if (!ini.hasSection(sec_name)) break; Log.level(10)<<endl<<"--- Reading "<<sec_name<<" from file "<<"'"<<filename<<"' : -----"<<endl; String name = ini.getValue(sec_name, "name"); bool is_fraction = ini.getValue(sec_name, "is_fraction").toBool(); double penalty = ini.getValue(sec_name, "penalty").toDouble(); double atoms = ini.getValue(sec_name, "atoms").toDouble(); vector<Vector3> v(4); Log.level(10)<<"name = "<<name<<endl; Log.level(10)<<"is_fraction = "<<is_fraction<<endl; Log.level(10)<<"atoms = "<<atoms<<endl; Log.level(10)<<"penalty = "<<penalty<<endl; ReferenceArea* rf; String use_ref = ini.getValue(sec_name, "use_ref_ligand"); if (use_ref != INIFile::UNDEFINED && use_ref.toBool()) { Log.level(10)<<"use_ref_ligand = true"<<endl; if (!ref_ligand) { throw BALL::Exception::GeneralException(__FILE__, __LINE__, "DockingAlgorithm::readOptionFile()", "Reference-ligand required but not specified!"); } rf = new ReferenceArea(ref_ligand, is_fraction, atoms, penalty); v = rf->input_points_; } else { for (Size i = 0; i <= 3; i++) { String pn = "p"+String(i); String s = ini.getValue(sec_name, pn); if (s == INIFile::UNDEFINED) { Log.error()<<"[Error:] 4 points must be defined for each ReferenceArea!"<<endl; return; } s.trim(); double d0 = s.getField(0, ", ").toDouble(); double d1 = s.getField(1, ", ").toDouble(); double d2 = s.getField(2, ", ").toDouble(); v[i] = Vector3(d0, d1, d2); } rf = new ReferenceArea(v[0], v[1], v[2], v[3], is_fraction, atoms, penalty); } // Increase size of box (e.g. bounding box around ref-ligand), if desired by the user. String inc = ini.getValue(sec_name, "box_size_increase"); if (inc != INIFile::UNDEFINED) { rf->enlarge(inc.toDouble()); } Log.level(10)<<"p0 = "<<v[0]<<endl; Log.level(10)<<"p1 = "<<v[1]<<endl; Log.level(10)<<"p2 = "<<v[2]<<endl; Log.level(10)<<"p3 = "<<v[3]<<endl; if (name != INIFile::UNDEFINED) rf->setName(name); output_constraints.push_back(rf); } for (Size i = 0; i < 100; i++) { string sec_name = "PharmacophoreConstraint"+String(i); if (!ini.hasSection(sec_name)) break; Log.level(10)<<endl<<"--- Reading "<<sec_name<<" from file "<<"'"<<filename<<"' : -----"<<endl; String name = ini.getValue(sec_name, "name"); double penalty = ini.getValue(sec_name, "penalty").toDouble(); double desired_energy = ini.getValue(sec_name, "desired interaction energy").toDouble(); String residues = ini.getValue(sec_name, "residue-IDs"); vector<String> residue_vector; residues.split(residue_vector, ", "); String types = ini.getValue(sec_name, "interaction types"); vector<String> types_vector; types.split(types_vector, ", "); list<String> types_list; for (Size i = 0; i < types_vector.size(); i++) { types_list.push_back(types_vector[i]); } Log.level(10)<<"name = "<<name<<endl; Log.level(10)<<"residue-IDs = "<<residues<<endl; Log.level(10)<<"interaction types = "<<types<<endl; Log.level(10)<<"desired interaction energy = "<<desired_energy<<endl; Log.level(10)<<"penalty = "<<penalty<<endl; PharmacophoreConstraint* phc = new PharmacophoreConstraint(residue_vector, types_list, desired_energy, penalty); if (name != INIFile::UNDEFINED) phc->setName(name); output_constraints.push_back(phc); } Log.level(10)<<endl<<"--- finished reading config-file."<<endl<<endl<<endl; }
// read in a list linking IPs, subnets & IP ranges to filter groups bool IPList::readIPMelangeList(const char *filename) { // load in the list file std::ifstream input ( filename ); if (!input) { if (!is_daemonised) { std::cerr << "Error reading file (does it exist?): " << filename << std::endl; } syslog(LOG_ERR, "%s%s","Error reading file (does it exist?): ",filename); return false; } // compile regexps for determining whether a list entry is an IP, a subnet (IP + mask), or a range RegExp matchIP, matchSubnet, matchRange; #ifdef HAVE_PCRE matchIP.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$"); matchSubnet.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$"); matchRange.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}-\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$"); #else matchIP.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$"); matchSubnet.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}/[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$"); matchRange.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}-[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$"); #endif // read in the file String line; char buffer[ 2048 ]; while (input) { if (!input.getline(buffer, sizeof( buffer ))) { break; } // ignore comments if (buffer[0] == '#') continue; // ignore blank lines if (strlen(buffer) < 7) continue; line = buffer; #ifdef DGDEBUG std::cout << "line: " << line << std::endl; #endif // store the IP address (numerically, not as a string) and filter group in either the IP list, subnet list or range list if (matchIP.match(line.toCharArray())) { struct in_addr address; if (inet_aton(line.toCharArray(), &address)) { uint32_t addr = ntohl(address.s_addr); iplist.push_back(addr); } } else if (matchSubnet.match(line.toCharArray())) { struct in_addr address; struct in_addr addressmask; String subnet(line.before("/")); String mask(line.after("/")); if (inet_aton(subnet.toCharArray(), &address) && inet_aton(mask.toCharArray(), &addressmask)) { ipl_subnetstruct s; uint32_t addr = ntohl(address.s_addr); s.mask = ntohl(addressmask.s_addr); // pre-mask the address for quick comparison s.maskedaddr = addr & s.mask; ipsubnetlist.push_back(s); } } else if (matchRange.match(line.toCharArray())) { struct in_addr addressstart; struct in_addr addressend; String start(line.before("-")); String end(line.after("-")); if (inet_aton(start.toCharArray(), &addressstart) && inet_aton(end.toCharArray(), &addressend)) { ipl_rangestruct r; r.startaddr = ntohl(addressstart.s_addr); r.endaddr = ntohl(addressend.s_addr); iprangelist.push_back(r); } } // hmmm. the line didn't match any of our regular expressions. // assume it's a hostname. else { line.toLower(); hostlist.push_back(line); } } input.close(); #ifdef DGDEBUG std::cout << "starting sort" << std::endl; #endif std::sort(iplist.begin(), iplist.end()); std::sort(hostlist.begin(), hostlist.end()); #ifdef DGDEBUG std::cout << "sort complete" << std::endl; std::cout << "ip list dump:" << std::endl; std::vector<uint32_t>::iterator i = iplist.begin(); while (i != iplist.end()) { std::cout << "IP: " << *i << std::endl; ++i; } std::cout << "subnet list dump:" << std::endl; std::list<ipl_subnetstruct>::iterator j = ipsubnetlist.begin(); while (j != ipsubnetlist.end()) { std::cout << "Masked IP: " << j->maskedaddr << " Mask: " << j->mask << std::endl; ++j; } std::cout << "range list dump:" << std::endl; std::list<ipl_rangestruct>::iterator k = iprangelist.begin(); while (k != iprangelist.end()) { std::cout << "Start IP: " << k->startaddr << " End IP: " << k->endaddr << std::endl; ++k; } std::cout << "host list dump:" << std::endl; std::vector<String>::iterator l = hostlist.begin(); while (l != hostlist.end()) { std::cout << "Hostname: " << *l << std::endl; ++l; } #endif return true; }
// Test whether or not a particular request's incoming/outgoing data should be scanned. // This is a later-stage test; info is known about the actual data itself when this is called. int CSPlugin::willScanData(const String &url, const char *user, int filtergroup, const char *ip, bool post, bool reconstituted, bool exception, bool bypass, const String &disposition, const String &mimetype, off_t size) { //exceptionvirusmimetypelist if (mimetype.length() > 2) { if (exceptionvirusmimetypelist.findInList(mimetype.toCharArray()) != NULL) { #ifdef DGDEBUG std::cout << "willScanData: ignoring exception MIME type (" << mimetype.c_str() << ")" << std::endl; #endif return DGCS_NOSCAN; // match } } //exceptionvirusextensionlist String extension; if (disposition.length() > 2) { // If we have a content-disposition, determine file extension from that #ifdef DGDEBUG std::cout << "disposition: " << disposition << std::endl; #endif std::string::size_type start = disposition.find("filename="); if (start != std::string::npos) { start += 9; char endchar = ';'; if (disposition[start] == '"') { endchar = '"'; ++start; } std::string::size_type end = disposition.find(endchar, start); if (end != std::string::npos) extension = disposition.substr(start, end - start); else extension = disposition.substr(start); } while (extension.contains(".")) { extension = extension.after("."); } extension = "." + extension; #ifdef DGDEBUG std::cout << "extension from disposition: " << extension << std::endl; #endif } else { // Otherwise, determine it from the URL String urld(HTTPHeader::decode(url)), path; urld.removeWhiteSpace(); urld.toLower(); urld.removePTP(); if (urld.contains("/")) { path = urld.after("/"); path.hexDecode(); path.realPath(); } if (!path.contains("?")) { extension = path; } else if (mimetype.contains("application/")) { extension = path; if (extension.contains("?")) { extension = extension.before("?"); } } #ifdef DGDEBUG std::cout << "extension from URL: " << extension << std::endl; #endif } if (extension.contains(".")) { if (exceptionvirusextensionlist.findEndsWith(extension.toCharArray()) != NULL) { #ifdef DGDEBUG std::cout << "willScanData: ignoring exception file extension (" << extension.c_str() << ")" << std::endl; #endif return DGCS_NOSCAN; // match } } #ifdef DGDEBUG std::cout << "willScanData: I'm interested" << std::endl; #endif return DGCS_NEEDSCAN; }
// ntlm auth header username extraction - also lets connection persist long enough to complete NTLM negotiation int ntlminstance::identify(Socket& peercon, Socket& proxycon, HTTPHeader &h, std::string &string) { FDTunnel fdt; Socket* upstreamcon; Socket ntlmcon; String url; if (transparent) { // we are actually sending to a second Squid, which just does NTLM ntlmcon.connect(transparent_ip, transparent_port); upstreamcon = &ntlmcon; url = h.getUrl(); h.makeTransparent(false); } else { upstreamcon = &proxycon; } String at(h.getAuthType()); if (transparent && (at != "NTLM")) { // obey forwarded-for options in what we send out std::string clientip; if (o.forwarded_for == 1) { if (o.use_xforwardedfor == 1) { // grab the X-Forwarded-For IP if available clientip = h.getXForwardedForIP(); // otherwise, grab the IP directly from the client connection if (clientip.length() == 0) clientip = peercon.getPeerIP(); } else { clientip = peercon.getPeerIP(); } h.addXForwardedFor(clientip); // add squid-like entry } // in transparent mode, we need to make the initial auth required response // appear to come from the smoothie itself as an origin server, not as a proxy. // // accomplish this by redirecting to a URL that results in accessing DG as if it was // a webserver, fudging origin-server-style NTLM auth to the client whilst actually // performing proper proxy-style auth to the parent proxy, then redirecting the client // back to the actual URL. if (!url.contains("sgtransntlmdest=")) { // user has not yet been redirected // get the browser to make a request to the proxy port on the relevant interface, // embedding the original URL they were trying to access. // unless they're accessing a domain for which authentication is not required, // in which case return a no match response straight away. if (no_auth_list >= 0) { #ifdef DGDEBUG std::cout << "NTLM: Checking noauthdomains list" << std::endl; #endif std::string::size_type start = url.find("://"); if (start != std::string::npos) { start += 3; std::string domain; domain = url.getHostname(); #ifdef DGDEBUG std::cout << "NTLM: URL " << url << ", domain " << domain << std::endl; #endif char *i; while ((start = domain.find('.')) != std::string::npos) { i = o.lm.l[no_auth_list]->findInList(domain.c_str()); if (i != NULL) { #ifdef DGDEBUG std::cout << "NTLM: Found domain in noauthdomains list" << std::endl; #endif return DGAUTH_NOMATCH; } domain.assign(domain.substr(start + 1)); } if (!domain.empty()) { domain = "." + domain; i = o.lm.l[no_auth_list]->findInList(domain.c_str()); if (i != NULL) { #ifdef DGDEBUG std::cout << "NTLM: Found domain in noauthdomains list" << std::endl; #endif return DGAUTH_NOMATCH; } } } } string = "http://"; string += hostname; string += ":"; string += String(peercon.getPort()).toCharArray(); string += "/?sgtransntlmdest="; string += url.toCharArray(); #ifdef DGDEBUG std::cout << "NTLM - redirecting client to " << string << std::endl; #endif return DGAUTH_REDIRECT; } #ifdef DGDEBUG std::cout << "NTLM - forging initial auth required from origin server" << std::endl; #endif // obey forwarded-for options in what we send out if (o.forwarded_for == 1) { std::string clientip; if (o.use_xforwardedfor == 1) { // grab the X-Forwarded-For IP if available clientip = h.getXForwardedForIP(); // otherwise, grab the IP directly from the client connection if (clientip.length() == 0) clientip = peercon.getPeerIP(); } else { clientip = peercon.getPeerIP(); } h.addXForwardedFor(clientip); // add squid-like entry } // send a variant on the original request (has to be something Squid will route to the outside // world, and that it will require NTLM authentication for) String domain(url.after("?sgtransntlmdest=").after("://")); if (domain.contains("/")) domain = domain.before("/"); domain = "http://" + domain + "/"; h.setURL(domain); h.makePersistent(); h.out(&peercon, upstreamcon, __DGHEADER_SENDALL); // grab the auth required response and make it look like it's from the origin server h.in(upstreamcon, true); h.makeTransparent(true); h.makePersistent(); // send it to the client h.out(NULL, &peercon, __DGHEADER_SENDALL); if (h.contentLength() != -1) fdt.tunnel(*upstreamcon, peercon, false, h.contentLength(), true); if (h.isPersistent()) { // now grab the client's response to the auth request, and carry on as usual. h.in(&peercon, true); h.makeTransparent(false); at = h.getAuthType(); } else return DGAUTH_NOMATCH; } else if (transparent && url.contains("?sgtransntlmdest=")) { // send a variant on the original request (has to be something Squid will route to the outside // world, and that it will require NTLM authentication for) String domain(url.after("?sgtransntlmdest=").after("://")); if (domain.contains("/")) domain = domain.before("/"); domain = "http://" + domain + "/"; h.setURL(domain); } if (at != "NTLM") { // if no auth currently underway, then... if (at.length() == 0) { // allow the initial request through so the client will get the proxy's initial auth required response. // advertise persistent connections so that parent proxy will agree to advertise NTLM support. #ifdef DGDEBUG std::cout << "No auth negotiation currently in progress - making initial request persistent so that proxy will advertise NTLM" << std::endl; #endif h.makePersistent(); } return DGAUTH_NOMATCH; } #ifdef DGDEBUG std::cout << "NTLM - sending step 1" << std::endl; #endif if (o.forwarded_for) { std::string clientip; if (o.use_xforwardedfor) { // grab the X-Forwarded-For IP if available clientip = h.getXForwardedForIP(); // otherwise, grab the IP directly from the client connection if (clientip.length() == 0) clientip = peercon.getPeerIP(); } else { clientip = peercon.getPeerIP(); } h.addXForwardedFor(clientip); // add squid-like entry } h.makePersistent(); h.out(&peercon, upstreamcon, __DGHEADER_SENDALL); #ifdef DGDEBUG std::cout << "NTLM - receiving step 2" << std::endl; #endif h.in(upstreamcon, true); if (h.authRequired()) { #ifdef DGDEBUG std::cout << "NTLM - sending step 2" << std::endl; #endif if (transparent) h.makeTransparent(true); h.out(NULL, &peercon, __DGHEADER_SENDALL); if (h.contentLength() != -1) fdt.tunnel(*upstreamcon, peercon, false, h.contentLength(), true); #ifdef DGDEBUG std::cout << "NTLM - receiving step 3" << std::endl; #endif h.in(&peercon, true); if (transparent) { h.makeTransparent(false); String domain(url.after("?sgtransntlmdest=").after("://")); if (domain.contains("/")) domain = domain.before("/"); domain = "http://" + domain + "/"; h.setURL(domain); } #ifdef DGDEBUG std::cout << "NTLM - decoding type 3 message" << std::endl; #endif std::string message(h.getAuthData()); ntlm_authenticate auth; ntlm_auth *a = &(auth.a); static char username[256]; // fixed size static char username2[256]; char* inptr = username; char* outptr = username2; size_t l,o; // copy the NTLM message into the union's buffer, simultaneously filling in the struct if ((message.length() > sizeof(ntlm_auth)) || (message.length() < offsetof(ntlm_auth, payload))) { syslog(LOG_ERR, "NTLM - Invalid message of length %zd, message was: %s", message.length(), message.c_str()); #ifdef DGDEBUG std::cerr << "NTLM - Invalid message of length " << message.length() << ", message was: " << message << std::endl; #endif return -3; } memcpy((void *)auth.buf, (const void *)message.c_str(), message.length()); // verify that the message is indeed a type 3 if (strcmp("NTLMSSP",a->h.signature) == 0 && WSWAP(a->h.type) == 3) { // grab the length & offset of the username within the message // cope with the possibility we are a different byte order to Windows l = SSWAP(a->user.len); o = WSWAP(a->user.offset); if ((l > 0) && (o >= 0) && (o + l) <= sizeof(a->payload) && (l <= 254)) { // everything is in range // note offsets are from start of packet - not the start of the payload area memcpy((void *)username, (const void *)&(auth.buf[o]),l); username[l] = '\0'; // check flags - we may need to convert from UTF-16 to something more sensible int f = WSWAP(a->flags); if (f & WSWAP(0x0001)) { iconv_t ic = iconv_open("UTF-8", "UTF-16LE"); if (ic == (iconv_t)-1) { syslog(LOG_ERR, "NTLM - Cannot initialise conversion from UTF-16LE to UTF-8: %s", strerror(errno)); #ifdef DGDEBUG std::cerr << "NTLM - Cannot initialise conversion from UTF-16LE to UTF-8: " << strerror(errno) << std::endl; #endif iconv_close(ic); return -2; } size_t l2 = 256; local_iconv_adaptor(iconv, ic, &inptr, &l, &outptr, &l2); iconv_close(ic); username2[256 - l2] = '\0'; #ifdef DGDEBUG std::cout << "NTLM - got username (converted from UTF-16LE) " << username2 << std::endl; #endif string = username2; } else { #ifdef DGDEBUG std::cout << "NTLM - got username " << username << std::endl; #endif string = username; } if (!transparent) return DGAUTH_OK; // if in transparent mode, send a redirect to the client's original requested URL, // having sent the final headers to the NTLM-only Squid to do with what it will std::string tmp = peercon.getPeerIP(); h.addXForwardedFor(tmp); h.out(&peercon, upstreamcon, __DGHEADER_SENDALL); // also, the return code matters in ways it hasn't mattered before: // mustn't send a redirect if it is still 407, or we get a redirection loop h.in(upstreamcon, true); if (h.returnCode() == 407) { h.makeTransparent(false); h.out(NULL, &peercon, __DGHEADER_SENDALL); return -10; } url = url.after("="); string = url.toCharArray(); return DGAUTH_REDIRECT; } } return DGAUTH_NOMATCH; } else { #ifdef DGDEBUG std::cout << "NTLM - step 2 was not part of an auth handshake!" << std::endl; for (unsigned int i = 0; i < h.header.size(); i++) std::cout << h.header[i] << std::endl; #endif syslog(LOG_ERR, "NTLM - step 2 was not part of an auth handshake! (%s)", h.header[0].toCharArray()); return -1; } }
// read in a list linking IPs, subnets & IP ranges to filter groups // return 0 for success, -1 for failure, 1 for warning int ipinstance::readIPMelangeList(const char *filename) { // load in the list file std::ifstream input ( filename ); if (!input) { if (!is_daemonised) { std::cerr << "Error reading file (does it exist?): " << filename << std::endl; } syslog(LOG_ERR, "%s%s","Error reading file (does it exist?): ",filename); return -1; } // compile regexps for determining whether a list entry is an IP, a subnet (IP + mask), or a range RegExp matchIP, matchSubnet, matchRange; #ifdef HAVE_PCRE matchIP.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$"); matchSubnet.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$"); matchRange.comp("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}-\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$"); #else matchIP.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$"); matchSubnet.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}/[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$"); matchRange.comp("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}-[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$"); #endif // read in the file String line; String key, value; char buffer[ 2048 ]; bool warn = false; while (input) { if (!input.getline(buffer, sizeof( buffer ))) { break; } // ignore comments if (buffer[0] == '#') continue; // ignore blank lines if (strlen(buffer) < 10) continue; line = buffer; // split into key & value if (line.contains("=")) { key = line.before("="); key.removeWhiteSpace(); value = line.after("filter"); } else { if (!is_daemonised) std::cerr << "No filter group given; entry " << line << " in " << filename << std::endl; syslog(LOG_ERR, "No filter group given; entry %s in %s", line.toCharArray(), filename); warn = true; continue; } #ifdef DGDEBUG std::cout << "key: " << key << std::endl; std::cout << "value: " << value.toInteger() << std::endl; #endif if ((value.toInteger() < 1) || (value.toInteger() > o.filter_groups)) { if (!is_daemonised) std::cerr << "Filter group out of range; entry " << line << " in " << filename << std::endl; syslog(LOG_ERR, "Filter group out of range; entry %s in %s", line.toCharArray(), filename); warn = true; continue; } // store the IP address (numerically, not as a string) and filter group in either the IP list, subnet list or range list if (matchIP.match(key.toCharArray())) { struct in_addr address; if (inet_aton(key.toCharArray(), &address)) { iplist.push_back(ip(ntohl(address.s_addr),value.toInteger()-1)); } } else if (matchSubnet.match(key.toCharArray())) { struct in_addr address; struct in_addr addressmask; String subnet(key.before("/")); String mask(key.after("/")); if (inet_aton(subnet.toCharArray(), &address) && inet_aton(mask.toCharArray(), &addressmask)) { subnetstruct s; int addr = ntohl(address.s_addr); s.mask = ntohl(addressmask.s_addr); // pre-mask the address for quick comparison s.maskedaddr = addr & s.mask; s.group = value.toInteger()-1; ipsubnetlist.push_back(s); } } else if (matchRange.match(key.toCharArray())) { struct in_addr addressstart; struct in_addr addressend; String start(key.before("-")); String end(key.after("-")); if (inet_aton(start.toCharArray(), &addressstart) && inet_aton(end.toCharArray(), &addressend)) { rangestruct r; r.startaddr = ntohl(addressstart.s_addr); r.endaddr = ntohl(addressend.s_addr); r.group = value.toInteger()-1; iprangelist.push_back(r); } } // hmmm. the key didn't match any of our regular expressions. output message & return a warning value. else { if (!is_daemonised) std::cerr << "Entry " << line << " in " << filename << " was not recognised as an IP address, subnet or range" << std::endl; syslog(LOG_ERR, "Entry %s in %s was not recognised as an IP address, subnet or range", line.toCharArray(), filename); warn = true; } } input.close(); #ifdef DGDEBUG std::cout << "starting sort" << std::endl; #endif std::sort(iplist.begin(), iplist.end()); #ifdef DGDEBUG std::cout << "sort complete" << std::endl; std::cout << "ip list dump:" << std::endl; std::vector<ip>::const_iterator i = iplist.begin(); while (i != iplist.end()) { std::cout << "IP: " << i->addr << " Group: " << i->group << std::endl; ++i; } std::cout << "subnet list dump:" << std::endl; std::list<subnetstruct>::const_iterator j = ipsubnetlist.begin(); while (j != ipsubnetlist.end()) { std::cout << "Masked IP: " << j->maskedaddr << " Mask: " << j->mask << " Group: " << j->group << std::endl; ++j; } std::cout << "range list dump:" << std::endl; std::list<rangestruct>::const_iterator k = iprangelist.begin(); while (k != iprangelist.end()) { std::cout << "Start IP: " << k->startaddr << " End IP: " << k->endaddr << " Group: " << k->group << std::endl; ++k; } #endif // return either warning or success return warn ? 1 : 0; }
String HTTPHeader::url(bool withport) { // Version of URL *with* port is not cached, // as vast majority of our code doesn't like // port numbers in URLs. if (cachedurl.length() > 0 && !withport) return cachedurl; port = 80; bool https = false; String hostname; String answer(header.front().after(" ")); answer.removeMultiChar(' '); if (answer.after(" ").startsWith("HTTP/")) { answer = answer.before(" HTTP/"); } else { answer = answer.before(" http/"); // just in case! } if (requestType() == "CONNECT") { https = true; port = 443; if (!answer.startsWith("https://")) { answer = "https://" + answer; } } if (pport != NULL) { port = pport->after(" ").toInteger(); if (port == 0 || port > 65535) port = (https ? 443 : 80); } if (answer.length()) { if (answer[0] == '/') { // must be the latter above if (phost != NULL) { hostname = phost->after(" "); hostname.removeWhiteSpace(); if (hostname.contains(":")) { port = hostname.after(":").toInteger(); if (port == 0 || port > 65535) { port = (https ? 443 : 80); } hostname = hostname.before(":"); } while (hostname.endsWith(".")) hostname.chop(); if (withport && (port != (https ? 443 : 80))) hostname += ":" + String(port); hostname = "http://" + hostname; answer = hostname + answer; } // Squid doesn't like requests in this format. Work around the fact. header.front() = requestType() + " " + answer + " HTTP/" + header.front().after(" HTTP/"); } else { // must be in the form GET http://foo.bar:80/ HTML/1.0 if (!answer.after("://").contains("/")) { answer += "/"; // needed later on so correct host is extracted } String protocol(answer.before("://")); hostname = answer.after("://"); String url(hostname.after("/")); url.removeWhiteSpace(); // remove rubbish like ^M and blanks if (url.length() > 0) { url = "/" + url; } hostname = hostname.before("/"); // extra / was added 4 here if (hostname.contains("@")) { // Contains a username:password combo hostname = hostname.after("@"); } if (hostname.contains(":")) { port = hostname.after(":").toInteger(); if (port == 0 || port > 65535) { port = (https ? 443 : 80); } hostname = hostname.before(":"); // chop off the port bit } while (hostname.endsWith(".")) hostname.chop(); if (withport && (port != (https ? 443 : 80))) hostname += ":" + String(port); answer = protocol + "://" + hostname + url; } } if (answer.endsWith("//")) { answer.chop(); } #ifdef DGDEBUG std::cout << "from header url:" << answer << std::endl; #endif // Don't include port numbers in the URL in the cached version. // Most of the code only copes with URLs *without* port specifiers. if (!withport) cachedurl = answer.toCharArray(); return answer; }
// is a URL malformed? bool HTTPHeader::malformedURL(const String& url) { String host(url.after("://")); if (host.contains("/")) host = host.before("/"); if (host.length() < 2) { #ifdef DGDEBUG std::cout << "host len too small" << std::endl; #endif return true; } if (host.contains(":")) host = host.before(":"); if (host.contains("..") || host.endsWith(".")) { #ifdef DGDEBUG std::cout << "double dots in domain name" << std::endl; #endif return true; } int i, len; unsigned char c; len = host.length(); bool containsletter = false; for (i = 0; i < len; i++) { c = (unsigned char) host[i]; // If it contains something other than numbers, dots, or [a-fx] (hex encoded IPs), // IP obfuscation can be ruled out. if (!containsletter && (((c < '0') || (c > '9')) && (c != '.') && (c != 'x') && (c != 'X') && ((c < 'a') || (c > 'f')) && ((c < 'A') || (c > 'F')))) containsletter = true; if (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && !(c >= '0' && c <= '9') && c != '.' && c != '-' && c != '_') { #ifdef DGDEBUG std::cout << "bad char in hostname" << std::endl; #endif return true; // only allowed letters, digits, hiphen, dots } } // no IP obfuscation going on if (containsletter) return false; #ifdef DGDEBUG else std::cout << "Checking for IP obfuscation in " << host << std::endl; #endif // Check no IP obfuscation is going on // This includes IPs encoded as a single decimal number, // fully or partly hex encoded, and octal encoded bool first = true; bool obfuscation = false; if (host.endsWith(".")) host.chop(); do { if (!first) host = host.after("."); first = false; String hostpart(host); if (host.contains(".")) hostpart = hostpart.before("."); // If any part of the host starts with a letter, any letter, // then we must have a hostname rather than an IP (obscured // or otherwise). TLDs never start with a number. if ((hostpart[0] >= 'a' && hostpart[0] <= 'z') || (hostpart[0] >= 'A' && hostpart[0] <= 'Z')) return false; // If any part of the host begins with 0, it may be hex or octal if ((hostpart[0] == '0') && (hostpart.length() > 1)) { obfuscation = true; continue; } // Also check range, for decimal obfuscation. int part = hostpart.toInteger(); if ((part < 0) || (part > 255)) obfuscation = true; } while (host.contains(".")); // If we have any obfuscated parts, and haven't proven it's a hostname, it's invalid. return obfuscation; }
// modifies the URL in all relevant header lines after a regexp search and replace // setURL Code originally from from Ton Gorter 2004 void HTTPHeader::setURL(String &url) { String hostname; bool https = (url.before("://") == "https"); int port = (https ? 443 : 80); if (!url.after("://").contains("/")) { url += "/"; } hostname = url.after("://").before("/"); if (hostname.contains("@")) { // Contains a username:password combo hostname = hostname.after("@"); } if (hostname.contains(":")) { port = hostname.after(":").toInteger(); if (port == 0 || port > 65535) { port = (https ? 443 : 80); } hostname = hostname.before(":"); // chop off the port bit } #ifdef DGDEBUG std::cout << "setURL: header.front() changed from: " << header.front() << std::endl; #endif if (!https) header.front() = header.front().before(" ") + " " + url + " " + header.front().after(" ").after(" "); else // Should take form of "CONNECT example.com:443 HTTP/1.0" for SSL header.front() = header.front().before(" ") + " " + hostname + ":" + String(port) + " " + header.front().after(" ").after(" "); #ifdef DGDEBUG std::cout << " to: " << header.front() << std::endl; #endif if (phost != NULL) { #ifdef DGDEBUG std::cout << "setURL: header[] line changed from: " << (*phost) << std::endl; #endif (*phost) = String("Host: ") + hostname; if (port != (https ? 443 : 80)) { (*phost) += ":"; (*phost) += String(port); } (*phost) += "\r"; #ifdef DGDEBUG std::cout << " to " << (*phost) << std::endl; #endif } if (pport != NULL) { #ifdef DGDEBUG std::cout << "setURL: header[] line changed from: " << (*pport) << std::endl; #endif (*pport) = String("Port: ") + String(port) + "\r"; #ifdef DGDEBUG std::cout << " to " << (*pport) << std::endl; #endif } // Don't just cache the URL we're sent - url() performs some other // processing, notably stripping the port part. Caching here will // bypass all that. //cachedurl = url.toCharArray(); }