void fragmentOnSomeBonds( const ROMol &mol, const std::vector<unsigned int> &bondIndices, std::vector<ROMOL_SPTR> &resMols, unsigned int maxToCut, bool addDummies, const std::vector<std::pair<unsigned int, unsigned int>> *dummyLabels, const std::vector<Bond::BondType> *bondTypes, std::vector<std::vector<unsigned int>> *nCutsPerAtom) { PRECONDITION((!dummyLabels || dummyLabels->size() == bondIndices.size()), "bad dummyLabel vector"); PRECONDITION((!bondTypes || bondTypes->size() == bondIndices.size()), "bad bondType vector"); if (bondIndices.size() > 63) throw ValueErrorException("currently can only fragment on up to 63 bonds"); if (!maxToCut || !mol.getNumAtoms() || !bondIndices.size()) return; boost::uint64_t state = (0x1L << maxToCut) - 1; boost::uint64_t stop = 0x1L << bondIndices.size(); std::vector<unsigned int> fragmentHere(maxToCut); std::vector<std::pair<unsigned int, unsigned int>> *dummyLabelsHere = nullptr; if (dummyLabels) { dummyLabelsHere = new std::vector<std::pair<unsigned int, unsigned int>>(maxToCut); } std::vector<Bond::BondType> *bondTypesHere = nullptr; if (bondTypes) { bondTypesHere = new std::vector<Bond::BondType>(maxToCut); } while (state < stop) { unsigned int nSeen = 0; for (unsigned int i = 0; i < bondIndices.size() && nSeen < maxToCut; ++i) { if (state & (0x1L << i)) { fragmentHere[nSeen] = bondIndices[i]; if (dummyLabelsHere) (*dummyLabelsHere)[nSeen] = (*dummyLabels)[i]; if (bondTypesHere) (*bondTypesHere)[nSeen] = (*bondTypes)[i]; ++nSeen; } } std::vector<unsigned int> *lCutsPerAtom = nullptr; if (nCutsPerAtom) { nCutsPerAtom->push_back(std::vector<unsigned int>(mol.getNumAtoms())); lCutsPerAtom = &(nCutsPerAtom->back()); } ROMol *nm = fragmentOnBonds(mol, fragmentHere, addDummies, dummyLabelsHere, bondTypesHere, lCutsPerAtom); resMols.push_back(ROMOL_SPTR(nm)); state = nextBitCombo(state); } delete dummyLabelsHere; delete bondTypesHere; }
FragCatParams::FragCatParams(const FragCatParams &other) { d_funcGroups.clear(); // copy consttructor d_typeStr = other.getTypeStr(); d_lowerFragLen = other.getLowerFragLength(); d_upperFragLen = other.getUpperFragLength(); d_tolerance = other.getTolerance(); // std::cout << "In param copier\n"; const MOL_SPTR_VECT &ofgrps = other.getFuncGroups(); // const MOL_PTR_VECT &ofgrps = other.getFuncGroups(); MOL_SPTR_VECT::const_iterator fgi; // MOL_PTR_VECT_CI fgi; for (fgi = ofgrps.begin(); fgi != ofgrps.end(); fgi++) { ROMol *nmol = new ROMol(*(fgi->get())); // ROMol *nmol = new ROMol(*(*fgi)); d_funcGroups.push_back(ROMOL_SPTR(nmol)); // d_funcGroups.push_back(nmol); } }
MOL_SPTR_VECT readFuncGroups(std::istream &inStream,int nToRead) { MOL_SPTR_VECT funcGroups; funcGroups.clear(); if (inStream.bad()) { throw BadFileException("Bad stream contents."); } const int MAX_LINE_LEN = 512; char inLine[MAX_LINE_LEN]; std::string tmpstr; int nRead=0; while (!inStream.eof() && (nToRead<0 || nRead<nToRead)) { inStream.getline(inLine, MAX_LINE_LEN,'\n'); tmpstr = inLine; // parse the molecule on this line (if there is one) ROMol *mol = getSmarts(tmpstr); if (mol) { funcGroups.push_back(ROMOL_SPTR(mol)); nRead++; } } return funcGroups; }
void constructFragmenterBondTypes(std::istream *inStream, const std::map<unsigned int,std::string> &atomTypes, std::vector<FragmenterBondType> &defs, std::string comment,bool validate,bool labelByConnector){ PRECONDITION(inStream,"no stream"); defs.clear(); defs.resize(0); unsigned int line=0; while(!inStream->eof()){ ++line; std::string tempStr=getLine(inStream); if(tempStr=="" || tempStr.find(comment)==0 ) continue; std::vector<std::string> tokens; boost::split(tokens,tempStr,boost::is_any_of(" \t"),boost::token_compress_on); if(tokens.size()<3){ BOOST_LOG(rdWarningLog)<<"line "<<line<<" is too short"<<std::endl; continue; } unsigned int idx1=boost::lexical_cast<unsigned int>(tokens[0]); if(atomTypes.find(idx1)==atomTypes.end()){ BOOST_LOG(rdWarningLog)<<"atom type #"<<idx1<<" not recognized."<<std::endl; continue; } unsigned int idx2=boost::lexical_cast<unsigned int>(tokens[1]); if(atomTypes.find(idx2)==atomTypes.end()){ BOOST_LOG(rdWarningLog)<<"atom type #"<<idx2<<" not recognized."<<std::endl; continue; } std::string sma1=atomTypes.find(idx1)->second; std::string sma2=atomTypes.find(idx2)->second; std::string smarts="[$("+ sma1 +")]"+tokens[2]+"[$("+ sma2 +")]"; ROMol *p=SmartsToMol(smarts); if(validate){ if(!p){ BOOST_LOG(rdWarningLog)<<"cannot convert SMARTS "<<smarts<<" to molecule at line "<<line<<std::endl; continue; } } FragmenterBondType fbt; fbt.atom1Type=idx1; fbt.atom2Type=idx2; if(labelByConnector){ fbt.atom1Label=idx1; fbt.atom2Label=idx2; } else { fbt.atom1Label=idx2; fbt.atom2Label=idx1; } if(p){ // for the purposes of replacing the bond, we'll use just the first // character to set the bond type (if we recognize it): switch(tokens[2][0]){ case '-': fbt.bondType = Bond::SINGLE;break; case '=': fbt.bondType = Bond::DOUBLE;break; case '#': fbt.bondType = Bond::TRIPLE;break; case ':': fbt.bondType = Bond::AROMATIC;break; default: fbt.bondType = p->getBondWithIdx(0)->getBondType(); } fbt.query = ROMOL_SPTR(p); } else { fbt.bondType=Bond::UNSPECIFIED; fbt.query=ROMOL_SPTR(); } defs.push_back(fbt); } }
ChemicalReaction *RxnSmartsToChemicalReaction( const std::string &text, std::map<std::string, std::string> *replacements, bool useSmiles) { std::size_t pos1 = text.find('>'); std::size_t pos2 = text.rfind('>'); if (pos1 == std::string::npos) { throw ChemicalReactionParserException( "a reaction requires at least one reactant and one product"); } if (text.find('>', pos1 + 1) != pos2) { throw ChemicalReactionParserException("multi-step reactions not supported"); } std::string reactText = text.substr(0, pos1); std::string agentText; if (pos2 != pos1 + 1) { agentText = text.substr(pos1 + 1, (pos2 - pos1) - 1); } std::string productText = text.substr(pos2 + 1); // recognize changes within the same molecules, e.g., intra molecular bond // formation // therefore we need to correctly interpret parenthesis and dots in the // reaction smarts std::vector<std::string> reactSmarts = DaylightParserUtils::splitSmartsIntoComponents(reactText); std::vector<std::string> productSmarts = DaylightParserUtils::splitSmartsIntoComponents(productText); auto *rxn = new ChemicalReaction(); for (const auto &txt : reactSmarts) { ROMol *mol; mol = DaylightParserUtils::constructMolFromString(txt, replacements, useSmiles); if (!mol) { std::string errMsg = "Problems constructing reactant from SMARTS: "; errMsg += txt; delete rxn; throw ChemicalReactionParserException(errMsg); } rxn->addReactantTemplate(ROMOL_SPTR(mol)); } for (const auto &txt : productSmarts) { ROMol *mol; mol = DaylightParserUtils::constructMolFromString(txt, replacements, useSmiles); if (!mol) { std::string errMsg = "Problems constructing product from SMARTS: "; errMsg += txt; delete rxn; throw ChemicalReactionParserException(errMsg); } rxn->addProductTemplate(ROMOL_SPTR(mol)); } updateProductsStereochem(rxn); ROMol *agentMol; // allow a reaction template to have no agent specified if (agentText.size() != 0) { agentMol = DaylightParserUtils::constructMolFromString( agentText, replacements, useSmiles); if (!agentMol) { std::string errMsg = "Problems constructing agent from SMARTS: "; errMsg += agentText; delete rxn; throw ChemicalReactionParserException(errMsg); } std::vector<ROMOL_SPTR> agents = MolOps::getMolFrags(*agentMol, false); delete agentMol; for (auto &agent : agents) { rxn->addAgentTemplate(agent); } } // "SMARTS"-based reactions have implicit properties rxn->setImplicitPropertiesFlag(true); return rxn; }
void ReactionPickler::_depickle(std::istream &ss, ChemicalReaction *rxn, int version) { PRECONDITION(rxn, "empty reaction"); Tags tag; uint32_t numReactants, numProducts, numAgents = 0; streamRead(ss, numReactants); streamRead(ss, numProducts); if (version > 1000) { streamRead(ss, numAgents); } // we use this here and below to set df_needsInit, so don't re-use the // variable uint32_t flag = 0; streamRead(ss, flag); rxn->setImplicitPropertiesFlag(flag & 0x1); // ------------------- // // Read Reactants // // ------------------- streamRead(ss, tag); if (tag != BEGINREACTANTS) { throw ReactionPicklerException( "Bad pickle format: BEGINREACTANTS tag not found."); } for (unsigned int i = 0; i < numReactants; ++i) { ROMol *mol = new ROMol(); MolPickler::molFromPickle(ss, mol); rxn->addReactantTemplate(ROMOL_SPTR(mol)); } streamRead(ss, tag); if (tag != ENDREACTANTS) { throw ReactionPicklerException( "Bad pickle format: ENDREACTANTS tag not found."); } streamRead(ss, tag); if (tag != BEGINPRODUCTS) { throw ReactionPicklerException( "Bad pickle format: BEGINPRODUCTS tag not found."); } for (unsigned int i = 0; i < numProducts; ++i) { ROMol *mol = new ROMol(); MolPickler::molFromPickle(ss, mol); rxn->addProductTemplate(ROMOL_SPTR(mol)); } streamRead(ss, tag); if (tag != ENDPRODUCTS) { throw ReactionPicklerException( "Bad pickle format: ENDPRODUCTS tag not found."); } if (numAgents != 0) { streamRead(ss, tag); if (tag != BEGINAGENTS) { throw ReactionPicklerException( "Bad pickle format: BEGINAGENTS tag not found."); } for (unsigned int i = 0; i < numAgents; ++i) { ROMol *mol = new ROMol(); MolPickler::molFromPickle(ss, mol); rxn->addAgentTemplate(ROMOL_SPTR(mol)); } streamRead(ss, tag); if (tag != ENDAGENTS) { throw ReactionPicklerException( "Bad pickle format: ENDAGENTS tag not found."); } } // need to do this after we add reactants and products rxn->df_needsInit = flag & 0x2; } // end of _depickle