Пример #1
0
void ScoringOptimizer::evaluate(double* correlation, double* R2, double* RMSE, Size* no_valid_complexes)
{
	StructurePreparer sp;
	if (scoring_function_name_.hasSubstring("PLP"))
	{
		sp.setScoringType("PLP");
	}

	result_.predictions.clear();
	result_.target_names.clear();
	result_.expected_affinities.clear();
	result_.score_contributions.clear();
	result_.score_contribution_names.clear();

	Size skipped_complexes = 0;
	Size atom_overlaps = 0;
	Size ignored_outliers = 0;
	Size i = 0;
	double sum_squared_errors = 0;

	for (list < Complex > ::iterator it = complexes_.begin(); it != complexes_.end(); it++, i++)
	{
		cout<<"==== evaluating complex "<<i<<", "<<it->name<<" ========="<<endl;

		ScoringFunction* scoring_function = NULL;

		try
		{
			System ligand;
			Molecule* mol = 0;
			System receptor;
			PDBFile p(it->receptor_file);
			p >> receptor;
			String par_file = options_.get("filename");
			sp.prepare(&receptor, par_file);
			SDFile* sdf = 0;

			if (it->ligand_file.hasSubstring(".mol2"))
			{
				MOL2File m(it->ligand_file);
				m >> ligand;
			}
			else if (it->ligand_file.hasSubstring(".mol"))
			{
				MOLFile m(it->ligand_file);
				m >> ligand;
			}
			else if (it->ligand_file.hasSubstring(".sdf"))
Пример #2
0
	void DockingAlgorithm::processMultiMoleculeFile(string input_filename, string output_filename, double score_cutoff, vector<double>* min_atoms_in_ref_areas, String toolinfo, String timestamp)
	{
		GenericMolFile* input = MolFileFactory::open(input_filename);
		if (!input)
		{
			String m = "Format of input file '"+input_filename+"' is not supported!";
			throw BALL::Exception::GeneralException(__FILE__, __LINE__, "DockingAlgorithm::processMuliMoleculeFile() error", m);
		}

		GenericMolFile* output = MolFileFactory::open(output_filename, ios::out, input);
		if (!output)
		{
			String m = "Format of output file '"+output_filename+"' is not supported!";
			throw BALL::Exception::GeneralException(__FILE__, __LINE__, "DockingAlgorithm::processMuliMoleculeFile() error", m);
		}
		
		DockResultFile* drf_output = dynamic_cast<DockResultFile*>(output);
		if (drf_output)
		{
			String dummy = "0";
			drf_output->setOutputParameters(Result::DOCKING, "score", dummy, name_+"+"+scoring_function_->getName());
			drf_output->setToolInfo(toolinfo, timestamp);
		}
		

		bool output_failed_dockings = (options.setDefaultBool("output_failed_dockings", false) && score_cutoff>=1e10);

		StructurePreparer sp;
		if (scoring_type_.hasSubstring("PLP"))
		{
			sp.setScoringType("PLP");
		}

		Timer timer;
		timer.start();
		int no_dockings = 0;

		ScoringFunction* scoring = getScoringFunction();

		bool b = 1;
		for (int mol_no = 1; b; mol_no++)
		{
			double score = 1e100;

			//AtomContainer ligand_i;
			Log.level(20)<<"====== ligand candidate "<<mol_no<<flush;
			Molecule* ligand_i = NULL;
			try
			{
				ligand_i = input->read();
				if (ligand_i == NULL) break;

				String name = ligand_i->getName();
				if (name != "") Log.level(20)<<", "<<name<<" ";
				Log.level(20)<<"============"<<endl<<flush;

				if (ligand_i->hasProperty("score_ligcheck"))
				{
					double score_ligcheck = ((String)ligand_i->getProperty("score_ligcheck").toString()).toDouble();
					if (score_ligcheck < 0.95) // 0 = error, 1 = check passed
					{
						Log.level(20)<<"Skipping compound because it has been marked as containing errors by LigCheck."<<endl;

						if (output_failed_dockings)
						{
							ligand_i->setProperty("score", 1e12);
							ligand_i->setProperty("docking-error", "molecule ignored because it did not pass LigCheck test");
							*output << *ligand_i;
							output->flush();
						}
						delete ligand_i;
						continue;
					}
				}

				sp.prepare(ligand_i, parameter_filename_);
				score = dockLigand(*ligand_i);
				Log.level(20)<<endl;
				scoring->printResult();

				if (score < score_cutoff)
				{
					list<Constraint*>& refs = scoring->constraints;
					vector<double> atoms_in_ref_areas(refs.size(), 0);

					int i = 0;
					for (list < Constraint* > ::iterator it = refs.begin(); it != refs.end(); it++, i++)
					{
						ReferenceArea* ref = dynamic_cast<ReferenceArea*>(*it);
						if (!ref) continue;
						atoms_in_ref_areas[i] = ref->getContainedAtoms();
						String name = "atoms in ";
						String n = ref->getName();
						if (name != "")
						{
							name += n;
						}
						else
						{
							name += "ReferenceArea "+String(i);
						}

						ligand_i->setProperty(name, atoms_in_ref_areas[i]);
					}

					ligand_i->setProperty("score", score);

					bool ok = 1;
					if (min_atoms_in_ref_areas != NULL)
					{
						for (Size i = 0; i < min_atoms_in_ref_areas->size() && i < atoms_in_ref_areas.size(); i++)
						{
							if ((*min_atoms_in_ref_areas)[i] > atoms_in_ref_areas[i])
							{
								ok = 0;
								break;
							}
						}
					}
					if (ok)
					{
						*output << *ligand_i;
						output->flush();
					}
				}
			}
			catch(BALL::Exception::GeneralException e)
			{
				Log.level(20)<<"Error for compound "<<mol_no<<" ! Skipping this compound."<<endl;

				if (output_failed_dockings)
				{
					ligand_i->setProperty("score", 1e12);
					ligand_i->setProperty("docking-error", e.getMessage());
					*output << *ligand_i;
					output->flush();
				}

				delete ligand_i;
				continue;
			}
			no_dockings++;
			Log.level(20)<<endl;
			delete ligand_i;
		}
		timer.stop();
		Log.level(20)<<"\nDocking "<<no_dockings<<" compounds: "<<scoring->convertTime(timer.getClockTime())<<endl;

		delete input;
		delete output;
	}
Пример #3
0
int runRescoring(CommandlineParser& par, bool simple_rescoring, bool train)
{
	// just to make sure ...
	if (simple_rescoring) train = false;


	/** If desired, write ini-file with default parameters and abort */

	String default_inifile = par.get("write_ini");
	if (default_inifile != CommandlineParser::NOT_FOUND)
	{
		// if ini-file already exists, read its entries first
		Options default_options;
		ScoringFunction::getDefaultOptions(default_options);
		list<Constraint*> clist;
		if (ifstream(default_inifile.c_str()))
		{
			DockingAlgorithm::readOptionFile(default_inifile, default_options, clist);
		}

		Options* scoring_options = default_options.getSubcategory("Scoring Function");
		scoring_options->setDefault("scoring_type", "MM");
		scoring_options->setDefault("nonbonded_cutoff_precalculation", scoring_options->get("nonbonded_cutoff"));
		scoring_options->set("nonbonded_cutoff", 3.0);
		DockingAlgorithm::writeOptionFile(par.get("write_ini"), default_options, clist);
		Log << "Ini-file w/ default values has been written to file '"<<default_inifile<<"'. Goodbye!"<<endl;
		return 0;
	}


	/** Fetch information about specification of desired rescoring approach */

	String method = "";
	String free_energy_label = "";
	String model_file = "";
	String scoring_type = "MM";
	String grid_file = "";
	Options option;
	par.copyAdvancedParametersToOptions(option);
	Options* option_category = option.getSubcategory("Scoring Function");
	if (!option_category) option_category = &option;

	list<Constraint*> constraints;
	if (par.get(DockingAlgorithm::OPTION_FILE_PARAMETER_NAME) != CommandlineParser::NOT_FOUND)
	{
		DockingAlgorithm::readOptionFile(par.get(DockingAlgorithm::OPTION_FILE_PARAMETER_NAME), option, constraints);
		scoring_type = option_category->setDefault("scoring_function", "MM");
		grid_file = option_category->setDefault("grid_file", "");
	}
	// Overload ini-file settings with values taken from command-line (if any)
	if (par.get("function") != CommandlineParser::NOT_FOUND)
	{
		scoring_type = par.get("function");
	}

	method = par.get("method");
	free_energy_label = par.get("exp");
	if (par.get("mod") != CommandlineParser::NOT_FOUND)
	{
		model_file = par.get("mod");
	}

	if (!simple_rescoring && method != "Rescoring3D" && method != "Rescoring4D" && method != "Rescoring1D")
	{
		cerr << "[Error:] Type of desired rescoring method unknown: available are 'Rescoring3D', 'Rescoring4D' and 'Rescoring1D'." << endl;
		exit(1);
	}


	/** Setup StructurePreparer, Rescoring and ScoringFunction  */

	//Log.setMinLevel(cout, 39);

	GenericMolFile* ref_ligand_file = MolFileFactory::open(par.get("rl"));
	Molecule* ref_ligand = ref_ligand_file->read();
	ref_ligand_file->close();
	delete ref_ligand_file;

	StructurePreparer* sp;
	if (scoring_type.hasSubstring("PLP"))
	{
		sp = new StructurePreparer("PLP");
	}
	else
	{
		sp = new StructurePreparer;
	}

	String par_file = option_category->get("filename");
	if (par_file == "") par_file="Amber/amber96-docking.ini";
	System receptor;
	GenericMolFile* receptor_file = MolFileFactory::open(par.get("rec"));
	if (!receptor_file)
	{
		cerr<<"Format of receptor-file not supported!"<<endl;
		return 1;
	}

	*receptor_file >> receptor;
	delete receptor_file;
	sp->prepare(&receptor, par_file);
	sp->prepare(ref_ligand, par_file);

	ScoringFunction* scoring_function;
	if (scoring_type == "MM" || scoring_type == "PB")
	{
		// Support for using one and the same config-file for docking and rescoring
		String precalc_nonbonded_cuttoff = option_category->get("nonbonded_cutoff_precalculation");
		if (precalc_nonbonded_cuttoff != "")
		{
			option_category->set("nonbonded_cutoff", precalc_nonbonded_cuttoff);
		}
	}
	if (scoring_type == "MM")
	{
		scoring_function = new MMScoring(receptor, *ref_ligand, option);
	}
	else if (scoring_type == "GridedMM")
	{
		scoring_function = new GridedMM(receptor, *ref_ligand, option);
	}
	else if (scoring_type == "GridedPLP")
	{
		scoring_function = new GridedPLP(receptor, *ref_ligand, option);
	}
	else if (scoring_type == "PLP")
	{
		scoring_function = new PLPScoring(receptor, *ref_ligand, option);
	}
	else if (scoring_type == "PB")
	{
		scoring_function = new PBScoring(receptor, *ref_ligand, option);
	}
	else
	{
		String mess="ScoringFunction type \'"+scoring_type+"\' unknown/unsupported!";
		cerr<<"[Error:] "<<mess<<endl;
		return 1;
	}

	GridBasedScoring* gbs = dynamic_cast<GridBasedScoring*>(scoring_function);
	for (list < Constraint* > ::iterator it = constraints.begin(); it != constraints.end(); it++)
	{
		scoring_function->constraints.push_back(*it);
		(*it)->setScoringFunction(scoring_function);
	}

	cout<<endl<<"-----------------------------------------"<<endl;
	cout<<"Scores will be calculated as : "<<scoring_function->getEquation()<<endl;
	cout<<"-----------------------------------------"<<endl<<endl;

	if (gbs != NULL)
	{
		gbs->replaceGridSetFromFile(grid_file);
	}

	Rescoring* rescoring = 0;

	if (!simple_rescoring)
	{
		if (!train && free_energy_label == "")
		{
			cerr<<"[Error:] free-energy label must be specified !"<<endl;
			return 1;
		}
		if (method == "Rescoring3D")
		{
			rescoring = new Rescoring3D(receptor, *ref_ligand, option, free_energy_label, scoring_function);
		}
		else if (method == "Rescoring4D")
		{
			rescoring = new Rescoring4D(receptor, *ref_ligand, option, free_energy_label, scoring_function);
		}
		else if (method == "Rescoring1D")
		{
			rescoring = new Rescoring1D(receptor, *ref_ligand, option, free_energy_label, scoring_function);
		}
		else
		{
			cerr<<"[Error:] Rescoring-method unknown !"<<endl;
			return 1;
		}
		if (!train)
		{
			rescoring->loadModel(model_file);
		}
		scoring_function->enableStoreInteractions();
	}

	scoring_function->setLigand(ref_ligand);
	scoring_function->update();
	scoring_function->updateScore();
	Log<<"====== Reference ligand ============"<<endl;
	scoring_function->printResult();


	/** If top fraction of docking results should not be rescored, then fetch scores and compute threshold for this fraction */

	bool ignore_top = false;
	double ignore_top_fraction = 0.0;
	if (par.has("tf"))
	{
		ignore_top_fraction = par.get("tf").toDouble();
		if (ignore_top_fraction < 1e-14 || ignore_top_fraction < 0 || ignore_top_fraction > 1)
		{
			ignore_top_fraction = 0.0;
			ignore_top = false;
		}
		else
		{
			ignore_top = true;
		}
	}
	double min_dock_score = -1e100;
	list<pair<double, bool> > rescore_list;
	if (ignore_top)
	{
		set<double> scores;
		GenericMolFile* input = MolFileFactory::open(par.get("i"));
		for (Molecule* mol = input->read(); mol; delete mol, mol = input->read())
		{
			if (mol->hasProperty("score"))
			{
				scores.insert(mol->getProperty("score").toString().toDouble());
			}
		}
		Size i = 0;
		Size max = scores.size()*ignore_top_fraction;
		set<double>::iterator s_it = scores.begin();
		for (; i < max; s_it++)
		{
			i++;
		}
		min_dock_score = *s_it;
		delete input;
	}


	/** Now, Rescore entire sd-/mol2-file   */

	double threshold = option.setDefaultReal("output_score_threshold", 1e100);

	GenericMolFile* input = MolFileFactory::open(par.get("i"));
	GenericMolFile* output = 0;

	if (simple_rescoring || !train)
	{
		output = MolFileFactory::open(par.get("o"), ios::out, input);

		DockResultFile* drf_output = dynamic_cast<DockResultFile*>(output);
		if (drf_output)
		{
			String dummy = "0";
			BALL::Docking::Result::Method method = Result::getMethod(3);
			String description = "";
			description = rescoring->getName()+"+"+scoring_function->getName();
			drf_output->setOutputParameters(method, "re-score", dummy, description);
		}
	}

	// Do the actual work ..
	processMultiMoleculeFile(scoring_function, sp, par_file, rescoring, train, min_dock_score, "score", input, output, threshold, ignore_top);

	if(!simple_rescoring)
	{
		if (train)
		{
			rescoring->recalibrate();
			rescoring->saveModel(par.get("o"));
		}
		else
		{
			double correlation, q2, stderr;
			rescoring->calculateQuality(correlation, q2, stderr);
			cout<<"Correlation = "<<String(correlation)<<endl;
			cout<<"Q2="<<String(q2)<<endl;
			cout<<"Standard error="<<String(stderr)<<endl;
		}
	}

	for (list<Constraint*>::iterator it = constraints.begin(); it != constraints.end(); it++)
	{
		delete *it;
	}

	delete rescoring;
	delete sp;
	delete ref_ligand;

	return 0;
}