Example #1
0
File: UCK.C Project: PierFio/ball
	//constructor (originally included in CADDSuite)
	UCK::UCK(const Molecule& mol, bool ignore_hydrogens, Size d)
		:	depth_(d),
			weight_(0.0)
	{
		id_ = mol.getName();
		id_.trim();
		depth_ = d;
		ignore_hydrogens_ = ignore_hydrogens;
		makeUCK(mol);
	}
Example #2
0
File: UCK.C Project: PierFio/ball
	//constructor
	UCK::UCK(const Molecule& mol, Size d)
		:	depth_(d),
			weight_(0.0),
			ignore_hydrogens_(false)
	{
		id_ = mol.getName();
		id_.trim();
		depth_ = d;
		makeUCK(mol);
	}
Example #3
0
int main(int argc, char* argv[])
{
    CommandlineParser parpars("RMSDCalculator", "calculate RMSD between ligand poses", VERSION, String(__DATE__), "Analysis");
    parpars.registerMandatoryInputFile("i", "input molecule file");
    parpars.registerMandatoryInputFile("org", "molecule file containing the original ('true') poses");
    parpars.registerOptionalOutputFile("o", "output molecule file");
    parpars.registerFlag("quiet", "by quiet, i.e. do not print progress information");
    String man = "This tool calculates the RMSD between different conformations of the same molecule.\n\nThis tool can be used to evaluate the differences between ligand poses taken from co-crystal structures, e.g. generated by a docking run.\nNote:Molecules may be sorted differently in the two input files; a topology hashkey will be used to match molecules to each other.\n\nOutput of this tool is a molecule file which will for each molecule contain a property-tag 'RMSD' holding the calculated RMSD value.";
    parpars.setToolManual(man);
    parpars.setSupportedFormats("i",MolFileFactory::getSupportedFormats());
    parpars.setSupportedFormats("org",MolFileFactory::getSupportedFormats());
    parpars.setSupportedFormats("o","mol2,sdf,drf");
    parpars.parse(argc, argv);

    // Retrieve coordinates of original poses
    GenericMolFile* original = MolFileFactory::open(parpars.get("org"));
    HashMap<String, list<Vector3> > original_poses;
    for (Molecule* mol = original->read(); mol; delete mol, mol = original->read())
    {
        String topology_hash;
        FlexibleMolecule::generateTopologyHash(mol, topology_hash, true);
        if (original_poses.find(topology_hash) != original_poses.end())
        {
            Log<<"[Warning:] more than one 'original' conformation for a molecule detected. Will use only the first conformation and ignore all other."<<endl;
        }
        else
        {
            list<Vector3> l;
            HashMap<String, list<Vector3> >::iterator map_it = original_poses.insert(make_pair(topology_hash, l)).first;

            for (AtomConstIterator it = mol->beginAtom(); +it; it++)
            {
                if (it->getElement().getSymbol() != "H")
                {
                    map_it->second.push_back(it->getPosition());
                }
            }
        }
    }
    delete original;

    // Retrieve coordinates of input poses and calculate RMSDs
    GenericMolFile* input = MolFileFactory::open(parpars.get("i"));
    GenericMolFile* output = 0;
    String filename = parpars.get("o");
    if (filename != CommandlineParser::NOT_FOUND)
    {
        output = MolFileFactory::open(filename, ios::out, input);
    }

    double average_RMSD = 0;
    int no_mols = 0;
    int no_valid_rmsds = 0;
    bool quiet = (parpars.get("quiet")!=CommandlineParser::NOT_FOUND);

    for (Molecule* mol = input->read(); mol; delete mol, mol = input->read())
    {
        no_mols++;
        String topology_hash;
        FlexibleMolecule::generateTopologyHash(mol, topology_hash, true);

        HashMap<String, list<Vector3> >::iterator map_it = original_poses.find(topology_hash);
        if (map_it == original_poses.end())
        {
            Log<<"[Warning:] no original pose for molecule '"<<mol->getName()<<"' found, its RMSD can thus not be computed."<<endl;
            mol->setProperty("RMSD", "N/A");
        }
        else
        {
            double RMSD = 0;
            list<Vector3>::iterator list_it = map_it->second.begin();
            int no_heavy_atoms = 0;
            AtomConstIterator it = mol->beginAtom();
            for (; +it ; it++)
            {
                if (it->getElement().getSymbol() != "H" && list_it != map_it->second.end())
                {
                    RMSD += pow(it->getPosition().getDistance(*list_it), 2);
                    no_heavy_atoms++;
                    list_it++;
                }
            }
            if (it != mol->endAtom() || list_it != map_it->second.end())
            {
                Log.error()<<"[Error:] Number of heavy atoms of input pose do not match number of heavy atoms of original pose!!"<<endl;
                return 1;
            }
            RMSD = sqrt(RMSD/no_heavy_atoms);
            mol->setProperty("RMSD", RMSD);
            average_RMSD += RMSD;
            no_valid_rmsds++;

            if (!quiet) Log << "RMSD for molecule "<<no_mols<<", '"<<mol->getName()<<"' = "<<RMSD<<endl;
        }

        if (output) *output << *mol;
    }

    average_RMSD /= no_valid_rmsds;

    Log <<endl<<"average RMSD = "<<average_RMSD<<endl<<endl;

    delete input;
    delete output;
    return 0;
}
Example #4
0
void mergeDRFiles(vector<String>& names, string& output_file, Size& best_k, string& e_property, double& score_cutoff, double& score_cuton)
{

	DockResultFile* output = new DockResultFile(output_file, ios::out);
	bool sort_by_scores = 1;
	if (e_property == "") sort_by_scores = 0;

	vector<Result*> new_results;

	/// First of all, copy Result data
	map<Result::Method, Result*> result_map;
	for (Size file = 0; file < names.size(); file++)
	{
		DockResultFile* input = new DockResultFile(names[file]);

		const vector<Result*>* results = input->getResults();
		for (Size i = 0; i < results->size(); i++)
		{
			map<Result::Method, Result*>::iterator it = result_map.find((*results)[i]->getMethod());
			if (it == result_map.end())
			{
				Result* result_copy = new Result(*(*results)[i]);
				if (!sort_by_scores) output->addResult(result_copy);
				else new_results.push_back(result_copy);
				result_map.insert(make_pair(result_copy->getMethod(), result_copy));
			}
			else
			{
				*it->second += *(*results)[i];
			}
		}

		input->close();
		delete input;
	}


	if (e_property != "")
	{
		e_property = "score_"+new_results.back()->getMethodString();
	}

	/// If no sorting is desired, iterate over all input-files and write each input-molecules to output-file
	if (!sort_by_scores)
	{
		output->disableAutomaticResultCreation();

		for (Size file = 0; file < names.size(); file++)
		{
			GenericMolFile* input = MolFileFactory::open(names[file]);

			int mol_no = 0;
			for (Molecule* mol = input->read(); mol; mol = input->read(), mol_no++)
			{
				*output << *mol;
				delete mol;
				Log.level(20) << "\r" << names[file] << " : " << mol_no+1;
				Log.flush();
			}
			Log.level(20)<<endl;
			Log.flush();

			input->close();
			delete input;
		}
	}

	/// If sorting is desired, iterate over all input-files and save each input-molecules to a map.
	/// Then write all FlexibleMolecules in this map to the output file and adapt the Result objects.
	else
	{
		multimap < double, FlexibleMolecule* > compounds; // map containing score and conformation-ID
		set < String > IDs; // IDs of the base-conformations

		for (Size file = 0; file < names.size(); file++)
		{
			DockResultFile* input = new DockResultFile(names[file]);
			int mol_no = 0;
			for (Molecule* mol = input->read(); mol; mol = input->read(), mol_no++)
			{
				if (!mol->hasProperty(e_property))
				{
					Log.level(10) << "Compound " << mol->getName() << " in file " << names[file] << " has no score property. Skipping this compound." << endl;
					for (Size i = 0; i < new_results.size(); i++)
					{
						new_results[i]->erase(input->getCurrentLigand());
					}
					delete mol;
					continue;
				}

				double score = ((String)mol->getProperty(e_property).toString()).toFloat();

				if (score > score_cutoff || score < score_cuton)
				{
					for (Size i = 0; i < new_results.size(); i++)
					{
						new_results[i]->erase(input->getCurrentLigand());
					}
					delete mol;
					continue;
				}

				if ((compounds.size() < best_k || score < compounds.rbegin()->first))
				{
					FlexibleMolecule* flexmol_copy = new FlexibleMolecule(*input->getCurrentLigand());
					compounds.insert(make_pair(score, flexmol_copy));
					IDs.insert(flexmol_copy->getId());

					if (compounds.size() > best_k)
					{
						for (Size i = 0; i < new_results.size(); i++)
						{
							new_results[i]->erase(compounds.rbegin()->second);
						}
						IDs.erase(compounds.rbegin()->second->getId());
						delete compounds.rbegin()->second;
						multimap<double, FlexibleMolecule*>::iterator it = compounds.end();
						it--;
						compounds.erase(it);
					}
				}
				else
				{
					for (Size i = 0; i < new_results.size(); i++)
					{
						new_results[i]->erase(input->getCurrentLigand());
					}
					delete mol;
				}
				Log.level(20) <<"\r"<<names[file]<<" : "<<mol_no+1<<flush;
			}
			Log.level(20)<<endl;

			input->close();
			delete input;
		}

		if (compounds.size() < best_k)
		{
			Log.level(20)<<"found "<<compounds.size()<<" compounds matching the given criteria."<<endl;
		}

		list<String> score_list;

		for (multimap < double, FlexibleMolecule* > ::iterator it = compounds.begin();
			it!=compounds.end(); it++)
		{
			output->writeLigand(it->second);
			score_list.push_back(it->second->getId());
			delete it->second;
		}

		// Remove those ligands from results for which no final result is available (e.g. due to missing atom parameters)
		vector < String > import_IDs = *new_results[0]->getInputConformations();
		for (Size i = 0; i < import_IDs.size(); i++)
		{
			if (IDs.find(import_IDs[i]) == IDs.end())
			{
				vector<vector<Result::ResultData> > data_list;
				for (Size k = 0; k < new_results.size(); k++)
				{
					if (k == 0)
					{
						data_list.push_back(*new_results[k]->get(import_IDs[i]));
					}

					vector<vector<Result::ResultData> > new_data_list;
					for (Size j = 0; j < data_list.size(); j++)
					{
						for (Size l = 0; l < data_list[j].size(); l++)
						{
							String ID = data_list[j][l].getLigandConformationId();
							new_data_list.push_back(*new_results[k]->get(ID));
							new_results[k]->erase(ID);
						}
					}
					data_list = new_data_list;
				}
			}
		}

		for (Size i = 0; i < new_results.size(); i++)
		{
			list<String> new_list;
			new_results[i]->sort(score_list, new_list);
			score_list = new_list;
		}

		output->writeResults(new_results);
	}

	output->close();
	delete output;
}
Example #5
0
void sortMolecules(vector<String>& names, string& output_file, Size& best_k, string& e_property, double& score_cutoff, double& score_cuton)
{
	multimap<double, Molecule*> compounds;

	for (Size file = 0; file < names.size(); file++)
	{
		GenericMolFile* input = MolFileFactory::open(names[file]);

		int mol_no = 0;
		for (Molecule* mol = input->read(); mol; mol = input->read(), mol_no++)
		{
			if (!mol->hasProperty(e_property))
			{
				Log.level(10) << "Compound " << mol->getName() << " in file " << names[file] << " has no score property. Skipping this compound." << endl;
				delete mol;
				continue;
			}

			double score = ((String)mol->getProperty(e_property).toString()).toFloat();

			if (score > score_cutoff || score < score_cuton)
			{
				delete mol;
				continue;
			}

			if ((compounds.size() < best_k || score < compounds.rbegin()->first))
			{
				compounds.insert(make_pair(score, mol));

				if (compounds.size() > best_k)
				{
					delete compounds.rbegin()->second;
					multimap<double, Molecule*>::iterator it = compounds.end();
					it--;
					compounds.erase(it);
				}
			}
			else
			{
				delete mol;
			}
			Log.level(20) << "\r" << names[file] << " : " << mol_no+1 << flush;
		}
		Log.level(20) << endl;
		Log.flush();

		input->close();
		delete input;
	}

	if (compounds.size() < best_k)
	{
		Log.level(20) << "found " << compounds.size() << " compounds matching the given criteria." << endl;
	}

	GenericMolFile* output = MolFileFactory::open(output_file, ios::out, "mol2.gz");

	for (multimap < double, Molecule* > ::iterator it = compounds.begin();
		it!=compounds.end(); it++)
	{
		*output << *it->second;
		delete it->second;
	}

	output->close();
	delete output;
}
Example #6
0
CHECK(Molecule() throw())
	b = new Molecule;
	TEST_NOT_EQUAL(b, 0)
RESULT											

CHECK(~Molecule() throw())
	delete b;
RESULT

CHECK(Molecule(const Molecule& molecule, bool deep = true) throw())
	Atom a1;
	Molecule m("a"), m2;
	m.append(a1);
	m2 = Molecule(m);
	TEST_EQUAL(m2.getName(), "a")
	TEST_EQUAL(m2.countAtoms(), 1)
RESULT

CHECK(Molecule(const String& name) throw())
	Molecule m("a");
	TEST_EQUAL(m.getName(), "a")
	Molecule m2("");
	TEST_EQUAL(m2.getName(), "")
RESULT

CHECK([EXTRA] clear())
	System s;
	Molecule m("a");
	Atom a1;
	m.append(a1);
Example #7
0
	bool KCFFile::write(const Molecule& molecule)
		throw(File::CannotWrite)
	{
		if (!isOpen() || getOpenMode() != std::ios::out)
		{
			throw File::CannotWrite(__FILE__, __LINE__, name_);
		}

		// An alias for simplicity's sake...
		std::ostream& os(getFileStream());
		
		// Write ENTRY block
		// number of blanks????  properties are not read, written??? Which ones are there?
		os << ENTRY_TAG << "      " << molecule.getName() << std::endl;
		
		static char buffer[BALL_MAX_LINE_LENGTH];

		// Write NODE block
		// How to create the KEGG atom types? How many blanks?
		// This is not specified in the KCF format description, so we use what we can
    // deduce from example files.
		// First line gets the NODE tag
		os << NODE_TAG << "      " << molecule.countAtoms() << "\n"; 
		Size count = 1;
		AtomConstIterator ai(molecule.beginAtom());
		std::map<const Atom*, Position> atom_to_index;
		for (; +ai; ++ai, ++count)
		{
			// Write the atom line.
			// Blanks????
			String type = ai->getTypeName();
			String comment;
			
			// Make sure the type is in the set of KEGG types????
			// Blanks?
			sprintf(buffer, "             %d %s %s %6.4f %6.4f %s\n", 
							count, type.c_str(), ai->getElement().getSymbol().c_str(), 
							ai->getPosition().x, ai->getPosition().y, comment.c_str());
			os << buffer;
			
			// Remember the index of the current atom to map atom
			// pointers back to indices for the EDGE section.
			atom_to_index[&*ai] = count;
		}
		
		// Write EDGE block. Walk over all bonds to do so.
		// Blanks????
		os << "EDGE    " << molecule.countBonds() << "\n";
		count = 1;
		for (ai = molecule.beginAtom(); +ai; ++ai)
		{
			for (Atom::BondConstIterator bi(ai->beginBond()); +bi; ++bi)
			{
				Position index1 = atom_to_index[bi->getFirstAtom()];
				Position index2 = atom_to_index[bi->getSecondAtom()];
				String comment;
		
				// Write every bond just once				
				if (bi->getFirstAtom() == &*ai)
				{
					sprintf(buffer, "          %4d %4d %4d %1d%s\n", 
									count, index1, index2, bi->getOrder(), comment.c_str());
					os << buffer;
					++count;
				}
			}
		}
		
		// Write the DELIMITER block
		os << DELIMITER_TAG << std::endl;
		
		return true;
	}
Example #8
0
void processMultiMoleculeFile(ScoringFunction* scoring_function, StructurePreparer* sp, String par_file, Rescoring* rescoring, bool train, double min_dock_score, String dock_score_label, GenericMolFile* input, GenericMolFile* output, double output_score_threshold, bool ignore_top)
{
	list<pair<double, bool> > rescore_list;
	double min_rescore = 1e12;
	int i = 1;

	for (Molecule* mol = input->read(); mol; mol = input->read(), i++)
	{
		String name = mol->getName();
		Log<<"====== Ligand "<<i;
		if (name != "") Log<<", "<<name;
		Log<<" ============"<<endl;
		Log.flush();

		if (mol->hasProperty("score_ligcheck"))
		{
			double score_ligcheck = ((String)mol->getProperty("score_ligcheck").toString()).toDouble();
			if (score_ligcheck < 0.95) // 0 = error, 1 = check passed
			{
				cout<<"Skipping compound because it has been marked as containing errors by LigCheck."<<endl;
				delete mol;
				continue;
			}
		}

		double score = 0;
		try
		{
			sp->prepare(mol, par_file);
			if (!rescoring || train)
			{
				scoring_function->setLigand(mol);
				scoring_function->update();
				score = scoring_function->updateScore();
				scoring_function->printResult();
			}
			if (rescoring)
			{
				if (train)
				{
					if (score > 1000)
					{
						Log.level(10)<<"score>1000, thus current compound is not added to training data set."<<endl;
						delete mol;
						continue;
					}
					rescoring->addScoreContributions(mol);
				}
				else score = rescoring->rescore(mol);
			}

			if (score < output_score_threshold)
			{
				if (ignore_top)
				{
					bool keep_dock_score = 0;
					if (mol->hasProperty("score"))
					{
						double docking_score = mol->getProperty(dock_score_label).toString().toDouble();
						if (docking_score < min_dock_score)
						{
							rescore_list.push_back(make_pair(docking_score, false));
							keep_dock_score = 1;
						}
					}
					if (!keep_dock_score)
					{
						rescore_list.push_back(make_pair(score, true));
						if (score < min_rescore)
						{
							min_rescore = score;
						}
					}
				}
				else
				{
					mol->setProperty("re-score", score);
				}
			}
		}
		catch (BALL::Exception::GeneralException e)
		{
			cout<<e.getMessage()<<endl;
			cout<<"Error! Skipping this molecule!"<<endl;
		}

		if (score < output_score_threshold && (!rescoring || !train) && !ignore_top)
		{
			*output << *mol;
		}

		delete mol;
	}

	/** If ignoring (i.e. not rescoring) the top fraction of docking-results, we need to make sure
	 that all rescored compounds get a rescore-value larger than that of the former. */
	if (ignore_top && !train)
	{
		input->reopen();
		int i = 1;
		double offset = min_dock_score-min_rescore+0.25;

		list<pair<double, bool> >::iterator rescore_it = rescore_list.begin();
		for (Molecule* mol = input->read(); mol; mol = input->read(), i++)
		{
			String name = mol->getName();
			if (mol->hasProperty("score_ligcheck"))
			{
				double score_ligcheck = ((String)mol->getProperty("score_ligcheck").toString()).toDouble();
				if (score_ligcheck < 0.95) // 0 = error, 1 = check passed
				{
					delete mol;
					continue;
				}
			}
			double rescore = rescore_it->first;
			if (rescore_it->second) // if compound was rescored
			{
				rescore += offset;
			}
			if (rescore < output_score_threshold)
			{
				mol->setProperty("re-score", rescore);
				*output << *mol;
			}
			delete mol;
			rescore_it++;
		}
	}

	delete input;
	if (output) delete output;
}
Example #9
0
CHECK(const Fragment* getFragment() const throw())
	TEST_EQUAL((atomx.getFragment() == &fragment), true)
RESULT

CHECK(Fragment* getFragment() throw())
	atomx.getFragment()->setName("1234");
	TEST_EQUAL(fragment.getName(), "1234")
RESULT

CHECK(const Molecule* getMolecule() const throw())
	TEST_EQUAL(acx.getMolecule(), &molecule)
RESULT

CHECK(Molecule* getMolecule() throw())
	atomx.getMolecule()->setName("asdff");
	TEST_EQUAL(molecule.getName(), "asdff")
RESULT

Bond*	bond;
CHECK(Bond* createBond(Atom& atom) throw())
	atom->createBond(*atom3);
	atom3->getBond(*atom);
	TEST_EQUAL(atom->countBonds(), 1)
	TEST_EQUAL(atom3->countBonds(), 1)
	TEST_EQUAL(atom->getBond(*atom3), atom3->getBond(*atom))
	atom->createBond(*atom4);
	atom->createBond(*atom4);
	bond = atom->createBond(*atom);
	TEST_EQUAL(bond, 0);

	TEST_EQUAL(atom4->countBonds(), 1)