//constructor (originally included in CADDSuite) UCK::UCK(const Molecule& mol, bool ignore_hydrogens, Size d) : depth_(d), weight_(0.0) { id_ = mol.getName(); id_.trim(); depth_ = d; ignore_hydrogens_ = ignore_hydrogens; makeUCK(mol); }
//constructor UCK::UCK(const Molecule& mol, Size d) : depth_(d), weight_(0.0), ignore_hydrogens_(false) { id_ = mol.getName(); id_.trim(); depth_ = d; makeUCK(mol); }
int main(int argc, char* argv[]) { CommandlineParser parpars("RMSDCalculator", "calculate RMSD between ligand poses", VERSION, String(__DATE__), "Analysis"); parpars.registerMandatoryInputFile("i", "input molecule file"); parpars.registerMandatoryInputFile("org", "molecule file containing the original ('true') poses"); parpars.registerOptionalOutputFile("o", "output molecule file"); parpars.registerFlag("quiet", "by quiet, i.e. do not print progress information"); String man = "This tool calculates the RMSD between different conformations of the same molecule.\n\nThis tool can be used to evaluate the differences between ligand poses taken from co-crystal structures, e.g. generated by a docking run.\nNote:Molecules may be sorted differently in the two input files; a topology hashkey will be used to match molecules to each other.\n\nOutput of this tool is a molecule file which will for each molecule contain a property-tag 'RMSD' holding the calculated RMSD value."; parpars.setToolManual(man); parpars.setSupportedFormats("i",MolFileFactory::getSupportedFormats()); parpars.setSupportedFormats("org",MolFileFactory::getSupportedFormats()); parpars.setSupportedFormats("o","mol2,sdf,drf"); parpars.parse(argc, argv); // Retrieve coordinates of original poses GenericMolFile* original = MolFileFactory::open(parpars.get("org")); HashMap<String, list<Vector3> > original_poses; for (Molecule* mol = original->read(); mol; delete mol, mol = original->read()) { String topology_hash; FlexibleMolecule::generateTopologyHash(mol, topology_hash, true); if (original_poses.find(topology_hash) != original_poses.end()) { Log<<"[Warning:] more than one 'original' conformation for a molecule detected. Will use only the first conformation and ignore all other."<<endl; } else { list<Vector3> l; HashMap<String, list<Vector3> >::iterator map_it = original_poses.insert(make_pair(topology_hash, l)).first; for (AtomConstIterator it = mol->beginAtom(); +it; it++) { if (it->getElement().getSymbol() != "H") { map_it->second.push_back(it->getPosition()); } } } } delete original; // Retrieve coordinates of input poses and calculate RMSDs GenericMolFile* input = MolFileFactory::open(parpars.get("i")); GenericMolFile* output = 0; String filename = parpars.get("o"); if (filename != CommandlineParser::NOT_FOUND) { output = MolFileFactory::open(filename, ios::out, input); } double average_RMSD = 0; int no_mols = 0; int no_valid_rmsds = 0; bool quiet = (parpars.get("quiet")!=CommandlineParser::NOT_FOUND); for (Molecule* mol = input->read(); mol; delete mol, mol = input->read()) { no_mols++; String topology_hash; FlexibleMolecule::generateTopologyHash(mol, topology_hash, true); HashMap<String, list<Vector3> >::iterator map_it = original_poses.find(topology_hash); if (map_it == original_poses.end()) { Log<<"[Warning:] no original pose for molecule '"<<mol->getName()<<"' found, its RMSD can thus not be computed."<<endl; mol->setProperty("RMSD", "N/A"); } else { double RMSD = 0; list<Vector3>::iterator list_it = map_it->second.begin(); int no_heavy_atoms = 0; AtomConstIterator it = mol->beginAtom(); for (; +it ; it++) { if (it->getElement().getSymbol() != "H" && list_it != map_it->second.end()) { RMSD += pow(it->getPosition().getDistance(*list_it), 2); no_heavy_atoms++; list_it++; } } if (it != mol->endAtom() || list_it != map_it->second.end()) { Log.error()<<"[Error:] Number of heavy atoms of input pose do not match number of heavy atoms of original pose!!"<<endl; return 1; } RMSD = sqrt(RMSD/no_heavy_atoms); mol->setProperty("RMSD", RMSD); average_RMSD += RMSD; no_valid_rmsds++; if (!quiet) Log << "RMSD for molecule "<<no_mols<<", '"<<mol->getName()<<"' = "<<RMSD<<endl; } if (output) *output << *mol; } average_RMSD /= no_valid_rmsds; Log <<endl<<"average RMSD = "<<average_RMSD<<endl<<endl; delete input; delete output; return 0; }
void mergeDRFiles(vector<String>& names, string& output_file, Size& best_k, string& e_property, double& score_cutoff, double& score_cuton) { DockResultFile* output = new DockResultFile(output_file, ios::out); bool sort_by_scores = 1; if (e_property == "") sort_by_scores = 0; vector<Result*> new_results; /// First of all, copy Result data map<Result::Method, Result*> result_map; for (Size file = 0; file < names.size(); file++) { DockResultFile* input = new DockResultFile(names[file]); const vector<Result*>* results = input->getResults(); for (Size i = 0; i < results->size(); i++) { map<Result::Method, Result*>::iterator it = result_map.find((*results)[i]->getMethod()); if (it == result_map.end()) { Result* result_copy = new Result(*(*results)[i]); if (!sort_by_scores) output->addResult(result_copy); else new_results.push_back(result_copy); result_map.insert(make_pair(result_copy->getMethod(), result_copy)); } else { *it->second += *(*results)[i]; } } input->close(); delete input; } if (e_property != "") { e_property = "score_"+new_results.back()->getMethodString(); } /// If no sorting is desired, iterate over all input-files and write each input-molecules to output-file if (!sort_by_scores) { output->disableAutomaticResultCreation(); for (Size file = 0; file < names.size(); file++) { GenericMolFile* input = MolFileFactory::open(names[file]); int mol_no = 0; for (Molecule* mol = input->read(); mol; mol = input->read(), mol_no++) { *output << *mol; delete mol; Log.level(20) << "\r" << names[file] << " : " << mol_no+1; Log.flush(); } Log.level(20)<<endl; Log.flush(); input->close(); delete input; } } /// If sorting is desired, iterate over all input-files and save each input-molecules to a map. /// Then write all FlexibleMolecules in this map to the output file and adapt the Result objects. else { multimap < double, FlexibleMolecule* > compounds; // map containing score and conformation-ID set < String > IDs; // IDs of the base-conformations for (Size file = 0; file < names.size(); file++) { DockResultFile* input = new DockResultFile(names[file]); int mol_no = 0; for (Molecule* mol = input->read(); mol; mol = input->read(), mol_no++) { if (!mol->hasProperty(e_property)) { Log.level(10) << "Compound " << mol->getName() << " in file " << names[file] << " has no score property. Skipping this compound." << endl; for (Size i = 0; i < new_results.size(); i++) { new_results[i]->erase(input->getCurrentLigand()); } delete mol; continue; } double score = ((String)mol->getProperty(e_property).toString()).toFloat(); if (score > score_cutoff || score < score_cuton) { for (Size i = 0; i < new_results.size(); i++) { new_results[i]->erase(input->getCurrentLigand()); } delete mol; continue; } if ((compounds.size() < best_k || score < compounds.rbegin()->first)) { FlexibleMolecule* flexmol_copy = new FlexibleMolecule(*input->getCurrentLigand()); compounds.insert(make_pair(score, flexmol_copy)); IDs.insert(flexmol_copy->getId()); if (compounds.size() > best_k) { for (Size i = 0; i < new_results.size(); i++) { new_results[i]->erase(compounds.rbegin()->second); } IDs.erase(compounds.rbegin()->second->getId()); delete compounds.rbegin()->second; multimap<double, FlexibleMolecule*>::iterator it = compounds.end(); it--; compounds.erase(it); } } else { for (Size i = 0; i < new_results.size(); i++) { new_results[i]->erase(input->getCurrentLigand()); } delete mol; } Log.level(20) <<"\r"<<names[file]<<" : "<<mol_no+1<<flush; } Log.level(20)<<endl; input->close(); delete input; } if (compounds.size() < best_k) { Log.level(20)<<"found "<<compounds.size()<<" compounds matching the given criteria."<<endl; } list<String> score_list; for (multimap < double, FlexibleMolecule* > ::iterator it = compounds.begin(); it!=compounds.end(); it++) { output->writeLigand(it->second); score_list.push_back(it->second->getId()); delete it->second; } // Remove those ligands from results for which no final result is available (e.g. due to missing atom parameters) vector < String > import_IDs = *new_results[0]->getInputConformations(); for (Size i = 0; i < import_IDs.size(); i++) { if (IDs.find(import_IDs[i]) == IDs.end()) { vector<vector<Result::ResultData> > data_list; for (Size k = 0; k < new_results.size(); k++) { if (k == 0) { data_list.push_back(*new_results[k]->get(import_IDs[i])); } vector<vector<Result::ResultData> > new_data_list; for (Size j = 0; j < data_list.size(); j++) { for (Size l = 0; l < data_list[j].size(); l++) { String ID = data_list[j][l].getLigandConformationId(); new_data_list.push_back(*new_results[k]->get(ID)); new_results[k]->erase(ID); } } data_list = new_data_list; } } } for (Size i = 0; i < new_results.size(); i++) { list<String> new_list; new_results[i]->sort(score_list, new_list); score_list = new_list; } output->writeResults(new_results); } output->close(); delete output; }
void sortMolecules(vector<String>& names, string& output_file, Size& best_k, string& e_property, double& score_cutoff, double& score_cuton) { multimap<double, Molecule*> compounds; for (Size file = 0; file < names.size(); file++) { GenericMolFile* input = MolFileFactory::open(names[file]); int mol_no = 0; for (Molecule* mol = input->read(); mol; mol = input->read(), mol_no++) { if (!mol->hasProperty(e_property)) { Log.level(10) << "Compound " << mol->getName() << " in file " << names[file] << " has no score property. Skipping this compound." << endl; delete mol; continue; } double score = ((String)mol->getProperty(e_property).toString()).toFloat(); if (score > score_cutoff || score < score_cuton) { delete mol; continue; } if ((compounds.size() < best_k || score < compounds.rbegin()->first)) { compounds.insert(make_pair(score, mol)); if (compounds.size() > best_k) { delete compounds.rbegin()->second; multimap<double, Molecule*>::iterator it = compounds.end(); it--; compounds.erase(it); } } else { delete mol; } Log.level(20) << "\r" << names[file] << " : " << mol_no+1 << flush; } Log.level(20) << endl; Log.flush(); input->close(); delete input; } if (compounds.size() < best_k) { Log.level(20) << "found " << compounds.size() << " compounds matching the given criteria." << endl; } GenericMolFile* output = MolFileFactory::open(output_file, ios::out, "mol2.gz"); for (multimap < double, Molecule* > ::iterator it = compounds.begin(); it!=compounds.end(); it++) { *output << *it->second; delete it->second; } output->close(); delete output; }
CHECK(Molecule() throw()) b = new Molecule; TEST_NOT_EQUAL(b, 0) RESULT CHECK(~Molecule() throw()) delete b; RESULT CHECK(Molecule(const Molecule& molecule, bool deep = true) throw()) Atom a1; Molecule m("a"), m2; m.append(a1); m2 = Molecule(m); TEST_EQUAL(m2.getName(), "a") TEST_EQUAL(m2.countAtoms(), 1) RESULT CHECK(Molecule(const String& name) throw()) Molecule m("a"); TEST_EQUAL(m.getName(), "a") Molecule m2(""); TEST_EQUAL(m2.getName(), "") RESULT CHECK([EXTRA] clear()) System s; Molecule m("a"); Atom a1; m.append(a1);
bool KCFFile::write(const Molecule& molecule) throw(File::CannotWrite) { if (!isOpen() || getOpenMode() != std::ios::out) { throw File::CannotWrite(__FILE__, __LINE__, name_); } // An alias for simplicity's sake... std::ostream& os(getFileStream()); // Write ENTRY block // number of blanks???? properties are not read, written??? Which ones are there? os << ENTRY_TAG << " " << molecule.getName() << std::endl; static char buffer[BALL_MAX_LINE_LENGTH]; // Write NODE block // How to create the KEGG atom types? How many blanks? // This is not specified in the KCF format description, so we use what we can // deduce from example files. // First line gets the NODE tag os << NODE_TAG << " " << molecule.countAtoms() << "\n"; Size count = 1; AtomConstIterator ai(molecule.beginAtom()); std::map<const Atom*, Position> atom_to_index; for (; +ai; ++ai, ++count) { // Write the atom line. // Blanks???? String type = ai->getTypeName(); String comment; // Make sure the type is in the set of KEGG types???? // Blanks? sprintf(buffer, " %d %s %s %6.4f %6.4f %s\n", count, type.c_str(), ai->getElement().getSymbol().c_str(), ai->getPosition().x, ai->getPosition().y, comment.c_str()); os << buffer; // Remember the index of the current atom to map atom // pointers back to indices for the EDGE section. atom_to_index[&*ai] = count; } // Write EDGE block. Walk over all bonds to do so. // Blanks???? os << "EDGE " << molecule.countBonds() << "\n"; count = 1; for (ai = molecule.beginAtom(); +ai; ++ai) { for (Atom::BondConstIterator bi(ai->beginBond()); +bi; ++bi) { Position index1 = atom_to_index[bi->getFirstAtom()]; Position index2 = atom_to_index[bi->getSecondAtom()]; String comment; // Write every bond just once if (bi->getFirstAtom() == &*ai) { sprintf(buffer, " %4d %4d %4d %1d%s\n", count, index1, index2, bi->getOrder(), comment.c_str()); os << buffer; ++count; } } } // Write the DELIMITER block os << DELIMITER_TAG << std::endl; return true; }
void processMultiMoleculeFile(ScoringFunction* scoring_function, StructurePreparer* sp, String par_file, Rescoring* rescoring, bool train, double min_dock_score, String dock_score_label, GenericMolFile* input, GenericMolFile* output, double output_score_threshold, bool ignore_top) { list<pair<double, bool> > rescore_list; double min_rescore = 1e12; int i = 1; for (Molecule* mol = input->read(); mol; mol = input->read(), i++) { String name = mol->getName(); Log<<"====== Ligand "<<i; if (name != "") Log<<", "<<name; Log<<" ============"<<endl; Log.flush(); if (mol->hasProperty("score_ligcheck")) { double score_ligcheck = ((String)mol->getProperty("score_ligcheck").toString()).toDouble(); if (score_ligcheck < 0.95) // 0 = error, 1 = check passed { cout<<"Skipping compound because it has been marked as containing errors by LigCheck."<<endl; delete mol; continue; } } double score = 0; try { sp->prepare(mol, par_file); if (!rescoring || train) { scoring_function->setLigand(mol); scoring_function->update(); score = scoring_function->updateScore(); scoring_function->printResult(); } if (rescoring) { if (train) { if (score > 1000) { Log.level(10)<<"score>1000, thus current compound is not added to training data set."<<endl; delete mol; continue; } rescoring->addScoreContributions(mol); } else score = rescoring->rescore(mol); } if (score < output_score_threshold) { if (ignore_top) { bool keep_dock_score = 0; if (mol->hasProperty("score")) { double docking_score = mol->getProperty(dock_score_label).toString().toDouble(); if (docking_score < min_dock_score) { rescore_list.push_back(make_pair(docking_score, false)); keep_dock_score = 1; } } if (!keep_dock_score) { rescore_list.push_back(make_pair(score, true)); if (score < min_rescore) { min_rescore = score; } } } else { mol->setProperty("re-score", score); } } } catch (BALL::Exception::GeneralException e) { cout<<e.getMessage()<<endl; cout<<"Error! Skipping this molecule!"<<endl; } if (score < output_score_threshold && (!rescoring || !train) && !ignore_top) { *output << *mol; } delete mol; } /** If ignoring (i.e. not rescoring) the top fraction of docking-results, we need to make sure that all rescored compounds get a rescore-value larger than that of the former. */ if (ignore_top && !train) { input->reopen(); int i = 1; double offset = min_dock_score-min_rescore+0.25; list<pair<double, bool> >::iterator rescore_it = rescore_list.begin(); for (Molecule* mol = input->read(); mol; mol = input->read(), i++) { String name = mol->getName(); if (mol->hasProperty("score_ligcheck")) { double score_ligcheck = ((String)mol->getProperty("score_ligcheck").toString()).toDouble(); if (score_ligcheck < 0.95) // 0 = error, 1 = check passed { delete mol; continue; } } double rescore = rescore_it->first; if (rescore_it->second) // if compound was rescored { rescore += offset; } if (rescore < output_score_threshold) { mol->setProperty("re-score", rescore); *output << *mol; } delete mol; rescore_it++; } } delete input; if (output) delete output; }
CHECK(const Fragment* getFragment() const throw()) TEST_EQUAL((atomx.getFragment() == &fragment), true) RESULT CHECK(Fragment* getFragment() throw()) atomx.getFragment()->setName("1234"); TEST_EQUAL(fragment.getName(), "1234") RESULT CHECK(const Molecule* getMolecule() const throw()) TEST_EQUAL(acx.getMolecule(), &molecule) RESULT CHECK(Molecule* getMolecule() throw()) atomx.getMolecule()->setName("asdff"); TEST_EQUAL(molecule.getName(), "asdff") RESULT Bond* bond; CHECK(Bond* createBond(Atom& atom) throw()) atom->createBond(*atom3); atom3->getBond(*atom); TEST_EQUAL(atom->countBonds(), 1) TEST_EQUAL(atom3->countBonds(), 1) TEST_EQUAL(atom->getBond(*atom3), atom3->getBond(*atom)) atom->createBond(*atom4); atom->createBond(*atom4); bond = atom->createBond(*atom); TEST_EQUAL(bond, 0); TEST_EQUAL(atom4->countBonds(), 1)