double getAlignmentTransform(const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &trans, int prbCid, int refCid, const MatchVectType *atomMap, const RDNumeric::DoubleVector *weights, bool reflect, unsigned int maxIterations) { RDGeom::Point3DConstPtrVect refPoints, prbPoints; const Conformer &prbCnf = prbMol.getConformer(prbCid); const Conformer &refCnf = refMol.getConformer(refCid); if (atomMap == 0) { // we have to figure out the mapping between the two molecule MatchVectType match; if (SubstructMatch(refMol, prbMol, match)) { MatchVectType::const_iterator mi; for (mi = match.begin(); mi != match.end(); mi++) { prbPoints.push_back(&prbCnf.getAtomPos(mi->first)); refPoints.push_back(&refCnf.getAtomPos(mi->second)); } } else { throw MolAlignException("No sub-structure match found between the probe and query mol"); } } else { MatchVectType::const_iterator mi; for (mi = atomMap->begin(); mi != atomMap->end(); mi++) { prbPoints.push_back(&prbCnf.getAtomPos(mi->first)); refPoints.push_back(&refCnf.getAtomPos(mi->second)); } } double ssr = RDNumeric::Alignments::AlignPoints(refPoints, prbPoints, trans, weights, reflect, maxIterations); ssr /= (prbPoints.size()); return sqrt(ssr); }
//************************************************************************************* // // Adds 2D coordinates to a molecule using the Depict.dll // // ARGUMENTS: // mol: the molecule to be altered // tempFilename: (OPTIONAL) the name of the temporary file // // RETURNS: // 1 on success, 0 otherwise // // Here's the process by which this works (it's kind of contorted): // 1) convert the mol to SMILES // 2) use the DLL to convert the SMILES to a mol file (on disk) // 3) parse the mol file into a temporary molecule // 4) do a substructure match from the old molecule to the // temp one (which may have a different atom numbering or additional // atoms added). // 5) Update the positions of the atoms on the old molecule. // 6) Free the temp molecule. // // NOTES: // - *FIX:* at the moment we're not doing anything to clear up the // temp file created in this process. It'll always have the same // name unless the user explicitly asks that we do something different. // - To use the DLL, it's essential that the COMBICHEM_ROOT and COMBICHEM_RELEASE // environment variables be set. If this isn't done, this whole process // will fail. // - See the notes above about failures when opening the DLL. // //************************************************************************************* int Add2DCoordsToMolDLL(ROMol &mol,std::string tempFilename){ std::string smi=MolToSmiles(mol,true); int tmp = SmilesToMolFileDLL(smi,tempFilename); int res = -1; if(tmp){ // build another mol from that mol file: RWMol *tmpMol = MolFileToMol(tempFilename,false); // match it up with the starting mol: // (We need to do this because the depict.dll conversion // to a mol file may have added Hs) MatchVectType matchVect; bool hasMatch=SubstructMatch(tmpMol,&mol,matchVect); if(hasMatch){ const Conformer &conf = tmpMol->getCoformer(0); Coformer *nconf = new Coformer(mol.getNumAtoms()); for(MatchVectType::const_iterator mvi=matchVect.begin(); mvi!=matchVect.end();mvi++){ nconf->setAtomPos(conf.getAtomPos(mvi->first)); } confId = (int)mol.addConformer(nconf, true); } delete tmpMol; } return res; }
bool StructCheckTautomer::applyTautomer(unsigned it) { if (Options.FromTautomer.size() <= it || Options.ToTautomer.size() <= it) { if (Options.Verbose) BOOST_LOG(rdInfoLog) << "ERROR: incorrect Tautomer index it=" << it << "\n"; return false; } const ROMol &fromTautomer = *Options.FromTautomer[it]; const ROMol &toTautomer = *Options.ToTautomer[it]; if (toTautomer.getNumAtoms() != fromTautomer.getNumAtoms()) { if (Options.Verbose) BOOST_LOG(rdInfoLog) << "ERROR: incorrect data toTautomer.getNumAtoms() " "!= fromTautomer.getNumAtoms()\n"; // incorrect data // throw(.....); return false; } const unsigned nta = toTautomer.getNumAtoms(); const unsigned ntb = toTautomer.getNumBonds(); MatchVectType match; // The format is (queryAtomIdx, molAtomIdx) std::vector<unsigned> atomIdxMap( Mol.getNumAtoms()); // matched tau atom indeces if (!SubstructMatch(Mol, *Options.FromTautomer[it], match)) // SSMatch(mp, from_tautomer, SINGLE_MATCH); return false; if (Options.Verbose) BOOST_LOG(rdInfoLog) << "found match for from_tautomer with " << nta << " atoms\n"; // init for (unsigned i = 0; i < Mol.getNumAtoms(); i++) atomIdxMap[i] = -1; for (MatchVectType::const_iterator mit = match.begin(); mit != match.end(); mit++) { unsigned tai = mit->first; // From and To Tautomer Atom index unsigned mai = mit->second; // Mol Atom index atomIdxMap[mai] = tai; } // scan for completely mapped bonds and replace bond order with mapped bond // from to_tautomer for (RDKit::BondIterator_ bond = Mol.beginBonds(); bond != Mol.endBonds(); bond++) { unsigned ti = atomIdxMap[(*bond)->getBeginAtomIdx()]; unsigned tj = atomIdxMap[(*bond)->getEndAtomIdx()]; if (-1 == ti || -1 == tj) continue; const Bond *tb = toTautomer.getBondBetweenAtoms(ti, tj); if (tb && (*bond)->getBondType() != tb->getBondType()) { (*bond)->setBondType(tb->getBondType()); } } // apply charge/radical fixes if any for (unsigned i = 0; i < match.size(); i++) { Atom &atom = *Mol.getAtomWithIdx(match[i].second); const Atom &ta = *toTautomer.getAtomWithIdx(match[i].first); atom.setFormalCharge(ta.getFormalCharge()); atom.setNumRadicalElectrons(ta.getNumRadicalElectrons()); } return true; }
void getCrippenAtomContribs(const ROMol &mol, std::vector< double > &logpContribs, std::vector< double > &mrContribs, bool force, std::vector<unsigned int> *atomTypes, std::vector<std::string> *atomTypeLabels ){ PRECONDITION(logpContribs.size()==mol.getNumAtoms() && mrContribs.size()==mol.getNumAtoms(), "bad result vector size"); PRECONDITION((!atomTypes || atomTypes->size()==mol.getNumAtoms()), "bad atomTypes vector"); PRECONDITION((!atomTypeLabels || atomTypeLabels->size()==mol.getNumAtoms()), "bad atomTypeLabels vector"); if(!force && mol.hasProp("_crippenLogPContribs")){ std::vector<double> tmpVect1,tmpVect2; mol.getProp("_crippenLogPContribs",tmpVect1); mol.getProp("_crippenMRContribs",tmpVect2); if(tmpVect1.size()==mol.getNumAtoms() && tmpVect2.size()==mol.getNumAtoms() ){ logpContribs=tmpVect1; mrContribs=tmpVect2; return; } } boost::dynamic_bitset<> atomNeeded(mol.getNumAtoms()); atomNeeded.set(); const CrippenParamCollection *params=CrippenParamCollection::getParams(); for(CrippenParamCollection::ParamsVect::const_iterator it=params->begin(); it!=params->end(); ++it){ std::vector<MatchVectType> matches; SubstructMatch(mol,*(it->dp_pattern.get()),matches, false,true); for(std::vector<MatchVectType>::const_iterator matchIt=matches.begin(); matchIt!=matches.end();++matchIt){ int idx=(*matchIt)[0].second; if(atomNeeded[idx]){ atomNeeded[idx]=0; logpContribs[idx] = it->logp; mrContribs[idx] = it->mr; if(atomTypes) (*atomTypes)[idx]=it->idx; if(atomTypeLabels) (*atomTypeLabels)[idx]=it->label; } } // no need to keep matching stuff if we already found all the atoms: if(atomNeeded.none()) break; } mol.setProp("_crippenLogPContribs",logpContribs,true); mol.setProp("_crippenMRContribs",mrContribs,true); }
bool SmartsMatcher::hasMatch(const ROMol &mol) const { PRECONDITION(d_pattern.get(), "bad on pattern"); if (d_min_count == 1 && d_max_count == UINT_MAX) { RDKit::MatchVectType matches; return SubstructMatch(mol, *d_pattern.get(), matches); } else { // need to count const bool uniquify = true; std::vector<RDKit::MatchVectType> matches; unsigned int count = RDKit::SubstructMatch(mol, *d_pattern.get(), matches, uniquify); return (count >= d_min_count && (d_max_count == UINT_MAX || count <= d_max_count)); } }
void addTorsions(const ROMol &mol, MMFFMolProperties *mmffMolProperties, ForceFields::ForceField *field, const std::string &torsionBondSmarts) { PRECONDITION(field, "bad ForceField"); PRECONDITION(mmffMolProperties, "bad MMFFMolProperties"); PRECONDITION(mmffMolProperties->isValid(), "missing atom types - invalid force-field"); std::ostream &oStream = mmffMolProperties->getMMFFOStream(); ROMol::ADJ_ITER nbr1Idx; ROMol::ADJ_ITER end1Nbrs; ROMol::ADJ_ITER nbr2Idx; ROMol::ADJ_ITER end2Nbrs; double totalTorsionEnergy = 0.0; RDGeom::PointPtrVect points; if (mmffMolProperties->getMMFFVerbosity()) { if (mmffMolProperties->getMMFFVerbosity() == MMFF_VERBOSITY_HIGH) { oStream << "\n" "T O R S I O N A L\n\n" "--------------ATOMS--------------- ---ATOM TYPES--- " "FF TORSION -----FORCE Params-----\n" " I J K L I J K L " "CLASS ANGLE ENERGY V1 V2 V3\n" "-------------------------------------------------------------" "-----------------------------------------------------" << std::endl; } points = field->positions(); } std::vector<MatchVectType> matchVect; const ROMol *defaultQuery = DefaultTorsionBondSmarts::query(); const ROMol *query = (torsionBondSmarts == DefaultTorsionBondSmarts::string()) ? defaultQuery : SmartsToMol(torsionBondSmarts); TEST_ASSERT(query); unsigned int nHits = SubstructMatch(mol, *query, matchVect); if (query != defaultQuery) delete query; for (unsigned int i = 0; i < nHits; ++i) { MatchVectType match = matchVect[i]; TEST_ASSERT(match.size() == 2); int idx2 = match[0].second; int idx3 = match[1].second; const Bond *bond = mol.getBondBetweenAtoms(idx2, idx3); TEST_ASSERT(bond); const Atom *jAtom = mol.getAtomWithIdx(idx2); const Atom *kAtom = mol.getAtomWithIdx(idx3); if (((jAtom->getHybridization() == Atom::SP2) || (jAtom->getHybridization() == Atom::SP3)) && ((kAtom->getHybridization() == Atom::SP2) || (kAtom->getHybridization() == Atom::SP3))) { ROMol::OEDGE_ITER beg1, end1; boost::tie(beg1, end1) = mol.getAtomBonds(jAtom); while (beg1 != end1) { const Bond *tBond1 = mol[*beg1].get(); if (tBond1 != bond) { int idx1 = tBond1->getOtherAtomIdx(idx2); ROMol::OEDGE_ITER beg2, end2; boost::tie(beg2, end2) = mol.getAtomBonds(kAtom); while (beg2 != end2) { const Bond *tBond2 = mol[*beg2].get(); if ((tBond2 != bond) && (tBond2 != tBond1)) { int idx4 = tBond2->getOtherAtomIdx(idx3); // make sure this isn't a three-membered ring: if (idx4 != idx1) { // we now have a torsion involving atoms (bonds): // bIdx - (tBond1) - idx1 - (bond) - idx2 - (tBond2) - eIdx unsigned int torType; MMFFTor mmffTorParams; if (mmffMolProperties->getMMFFTorsionParams( mol, idx1, idx2, idx3, idx4, torType, mmffTorParams)) { TorsionAngleContrib *contrib = new TorsionAngleContrib( field, idx1, idx2, idx3, idx4, &mmffTorParams); field->contribs().push_back(ForceFields::ContribPtr(contrib)); if (mmffMolProperties->getMMFFVerbosity()) { const Atom *iAtom = mol.getAtomWithIdx(idx1); const Atom *lAtom = mol.getAtomWithIdx(idx4); unsigned int iAtomType = mmffMolProperties->getMMFFAtomType(idx1); unsigned int jAtomType = mmffMolProperties->getMMFFAtomType(idx2); unsigned int kAtomType = mmffMolProperties->getMMFFAtomType(idx3); unsigned int lAtomType = mmffMolProperties->getMMFFAtomType(idx4); const RDGeom::Point3D p1((*(points[idx1]))[0], (*(points[idx1]))[1], (*(points[idx1]))[2]); const RDGeom::Point3D p2((*(points[idx2]))[0], (*(points[idx2]))[1], (*(points[idx2]))[2]); const RDGeom::Point3D p3((*(points[idx3]))[0], (*(points[idx3]))[1], (*(points[idx3]))[2]); const RDGeom::Point3D p4((*(points[idx4]))[0], (*(points[idx4]))[1], (*(points[idx4]))[2]); const double cosPhi = MMFF::Utils::calcTorsionCosPhi(p1, p2, p3, p4); const double torsionEnergy = MMFF::Utils::calcTorsionEnergy( mmffTorParams.V1, mmffTorParams.V2, mmffTorParams.V3, cosPhi); if (mmffMolProperties->getMMFFVerbosity() == MMFF_VERBOSITY_HIGH) { oStream << std::left << std::setw(2) << iAtom->getSymbol() << " #" << std::setw(5) << idx1 + 1 << std::setw(2) << jAtom->getSymbol() << " #" << std::setw(5) << idx2 + 1 << std::setw(2) << kAtom->getSymbol() << " #" << std::setw(5) << idx3 + 1 << std::setw(2) << lAtom->getSymbol() << " #" << std::setw(5) << idx4 + 1 << std::right << std::setw(5) << iAtomType << std::setw(5) << jAtomType << std::setw(5) << kAtomType << std::setw(5) << lAtomType << std::setw(6) << torType << " " << std::fixed << std::setprecision(3) << std::setw(10) << RAD2DEG * acos(cosPhi) << std::setw(10) << torsionEnergy << std::setw(10) << mmffTorParams.V1 << std::setw(10) << mmffTorParams.V2 << std::setw(10) << mmffTorParams.V3 << std::endl; } totalTorsionEnergy += torsionEnergy; } } } } beg2++; } } beg1++; } } } if (mmffMolProperties->getMMFFVerbosity()) { if (mmffMolProperties->getMMFFVerbosity() == MMFF_VERBOSITY_HIGH) { oStream << std::endl; } oStream << "TOTAL TORSIONAL ENERGY =" << std::right << std::setw(16) << std::fixed << std::setprecision(4) << totalTorsionEnergy << std::endl; } }
// ------------------------------------------------------------------------ // // // // ------------------------------------------------------------------------ void addTorsions(const ROMol &mol,const AtomicParamVect ¶ms, ForceFields::ForceField *field, std::string torsionBondSmarts){ PRECONDITION(mol.getNumAtoms()==params.size(),"bad parameters"); PRECONDITION(field,"bad forcefield"); // find all of the torsion bonds: std::vector<MatchVectType> matchVect; ROMol *query=SmartsToMol(torsionBondSmarts); TEST_ASSERT(query); unsigned int nHits=SubstructMatch(mol,*query,matchVect); delete query; for(unsigned int i=0; i<nHits; i++){ MatchVectType match=matchVect[i]; TEST_ASSERT(match.size()==2); int idx1=match[0].second; int idx2=match[1].second; if(!params[idx1]||!params[idx2]) continue; const Bond *bond=mol.getBondBetweenAtoms(idx1,idx2); std::vector<TorsionAngleContrib *> contribsHere; TEST_ASSERT(bond); const Atom *atom1=mol.getAtomWithIdx(idx1); const Atom *atom2=mol.getAtomWithIdx(idx2); if( (atom1->getHybridization()==Atom::SP2||atom1->getHybridization()==Atom::SP3) && (atom2->getHybridization()==Atom::SP2||atom2->getHybridization()==Atom::SP3) ){ ROMol::OEDGE_ITER beg1,end1; boost::tie(beg1,end1) = mol.getAtomBonds(atom1); while(beg1!=end1){ const Bond *tBond1=mol[*beg1].get(); if(tBond1!=bond){ int bIdx = tBond1->getOtherAtomIdx(idx1); ROMol::OEDGE_ITER beg2,end2; boost::tie(beg2,end2) = mol.getAtomBonds(atom2); while(beg2 != end2){ const Bond *tBond2=mol[*beg2].get(); if(tBond2!=bond && tBond2!=tBond1){ int eIdx=tBond2->getOtherAtomIdx(idx2); // make sure this isn't a three-membered ring: if(eIdx != bIdx){ // we now have a torsion involving atoms (bonds): // bIdx - (tBond1) - idx1 - (bond) - idx2 - (tBond2) - eIdx TorsionAngleContrib *contrib; // if either of the end atoms is SP2 hybridized, set a flag // here. bool hasSP2=false; if(mol.getAtomWithIdx(bIdx)->getHybridization()==Atom::SP2 || mol.getAtomWithIdx(bIdx)->getHybridization()==Atom::SP2) { hasSP2 = true; } //std::cout << "Torsion: " << bIdx << "-" << idx1 << "-" << idx2 << "-" << eIdx << std::endl; //if(okToIncludeTorsion(mol,bond,bIdx,idx1,idx2,eIdx)){ //std::cout << " INCLUDED" << std::endl; contrib = new TorsionAngleContrib(field,bIdx,idx1,idx2,eIdx, bond->getBondTypeAsDouble(), atom1->getAtomicNum(), atom2->getAtomicNum(), atom1->getHybridization(), atom2->getHybridization(), params[idx1],params[idx2], hasSP2); field->contribs().push_back(ForceFields::ContribPtr(contrib)); contribsHere.push_back(contrib); //} } } beg2++; } } beg1++; } } // now divide the force constant for each contribution to the torsion energy // about this bond by the number of contribs about this bond: for(std::vector<TorsionAngleContrib *>::iterator chI=contribsHere.begin(); chI!=contribsHere.end();++chI){ (*chI)->scaleForceConstant(contribsHere.size()); } } }
MatchVectType findFuncGroupsOnMol(const ROMol &mol, const FragCatParams *params, INT_VECT &fgBonds) { PRECONDITION(params,"bad params"); fgBonds.clear(); std::pair<int, int> amat; MatchVectType aidFgrps; std::vector<MatchVectType> fgpMatches; std::vector<MatchVectType>::const_iterator mati; MatchVectType::const_iterator mi; int aid; //const ROMol *fgrp; INT_VECT_CI bi; aidFgrps.clear(); int fid = 0; const MOL_SPTR_VECT &fgrps = params->getFuncGroups(); MOL_SPTR_VECT::const_iterator fgci; for (fgci = fgrps.begin(); fgci != fgrps.end(); fgci++) { const ROMol *fgrp = fgci->get(); std::string fname; (*fgci)->getProp(common_properties::_Name, fname); //std::cout << "Groups number: " << fname << "\n"; //(*fgci)->debugMol(std::cout); //mol->debugMol(std::cout); // match this functional group onto the molecule SubstructMatch(mol, *fgrp, fgpMatches); // loop over all the matches we get for this fgroup for (mati = fgpMatches.begin(); mati != fgpMatches.end(); mati++) { //FIX: we will assume that the first atom in fgrp is always the connection // atom amat = mati->front(); aid = amat.second; //FIX: is this correct - the second entry in the pair is the atom ID from mol // grab the list of atom Ids from mol that match the functional group INT_VECT bondIds, maids; for (mi = mati->begin(); mi != mati->end(); mi++) { maids.push_back(mi->second); } // create a list of bond IDs from these atom ID // these are the bond in mol that are part of portion that matches the // functional group bondIds = Subgraphs::bondListFromAtomList(mol, maids); // now check if all these bonds have been covered as part of larger // functional group that was dealt with earlier // FIX: obviously we assume here that the function groups in params // come in decreasing order of size. bool allDone = true; for (bi = bondIds.begin(); bi != bondIds.end(); bi++) { if (std::find(fgBonds.begin(), fgBonds.end(), (*bi)) == fgBonds.end()) { allDone = false; fgBonds.push_back(*bi); } } if (!allDone) { // this functional group mapping onto mol is not part of a larger func // group mapping so record it aidFgrps.push_back(std::pair<int, int>(aid, fid)); } } fid++; } return aidFgrps; }
bool fragmentMol(const ROMol& mol, std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >& res, unsigned int minCuts, unsigned int maxCuts, unsigned int maxCutBonds, const std::string& pattern) { #ifdef _DEBUG for (size_t i = 0; i < mol.getNumAtoms(); i++) { std::string symbol = mol.getAtomWithIdx(i)->getSymbol(); int label = 0; mol.getAtomWithIdx(i)->getPropIfPresent(common_properties::molAtomMapNumber, label); char a1[32]; if (0 == label) sprintf(a1, "\'%s\'", symbol.c_str(), label); else sprintf(a1, "\'%s:%u\'", symbol.c_str(), label); std::cout << "Atom " << i << ": " << a1; //<<" Bonds:"; std::cout << "\n"; } #endif res.clear(); std::auto_ptr<const ROMol> smarts((const ROMol*)SmartsToMol(pattern)); std::vector<MatchVectType> matching_atoms; // one bond per match ! with default pattern unsigned int total = SubstructMatch(mol, *smarts, matching_atoms); #ifdef _DEBUG std::cout << "total substructs =" << total << "\nmatching bonds (atom1, atom2):\n"; #endif if (0 == total) // Not found. Return empty set of molecules return false; #ifdef _DEBUG for (size_t i = 0; i < matching_atoms.size(); i++) { std::string symbol = mol.getAtomWithIdx(matching_atoms[i][0].second)->getSymbol(); int label = 0; mol.getAtomWithIdx(matching_atoms[i][0].second) ->getPropIfPresent(common_properties::molAtomMapNumber, label); char a1[32]; if (0 == label) sprintf(a1, "\'%s\'", symbol.c_str(), label); else sprintf(a1, "\'%s:%u\'", symbol.c_str(), label); symbol = mol.getAtomWithIdx(matching_atoms[i][1].second)->getSymbol(); label = 0; mol.getAtomWithIdx(matching_atoms[i][1].second) ->getPropIfPresent(common_properties::molAtomMapNumber, label); char a2[32]; if (0 == label) sprintf(a2, "\'%s\'", symbol.c_str(), label); else sprintf(a2, "\'%s:%u\'", symbol.c_str(), label); std::cout << i << ": (" << matching_atoms[i][0].second << a1 << "," << matching_atoms[i][1].second << a2 << ") \n"; } #endif std::vector<BondVector_t> matching_bonds; // List of matched query's bonds convertMatchingToBondVect(matching_bonds, matching_atoms, mol); if (matching_bonds.size() > maxCutBonds) return false; #ifdef _DEBUG std::cout << "total matching_bonds = " << matching_bonds.size() << "\n"; #endif // loop to generate every cut in the molecule BondVector_t bonds_selected; processCuts(0, minCuts, maxCuts, bonds_selected, matching_bonds, mol, res); return true; }
FeatSPtrList MolChemicalFeatureFactory::getFeaturesForMol(const ROMol &mol, const char* includeOnly) const { PRECONDITION(includeOnly,"bad limits"); std::string limits(includeOnly); #ifdef USE_VFLIB AR_MOLGRAPH *molG=getMolGraph(mol); #endif FeatSPtrList res; int idx = 1; typedef std::vector< std::pair< std::string,std::set<int> > > MatchSetCollection; MatchSetCollection matchSets; for(MolChemicalFeatureDef::CollectionType::const_iterator featDefIt=beginFeatureDefs(); featDefIt!=endFeatureDefs();featDefIt++){ MolChemicalFeatureDef::CollectionType::value_type featDef=*featDefIt; if(limits=="" || limits==featDef->getFamily()){ std::vector< MatchVectType > matches; #ifdef USE_VFLIB unsigned int numMatches=SubstructMatch(molG,*featDef->getPattern(),matches); #else unsigned int numMatches=SubstructMatch(mol,*featDef->getPattern(),matches); #endif for(unsigned int i=0;i<numMatches;i++){ const MatchVectType &match=matches[i]; std::set<int> matchSet; for(MatchVectType::const_iterator mIt=match.begin(); mIt!=match.end(); ++mIt){ matchSet.insert(mIt->second); } // loop over the matches we've already found and see if this one // is unique: bool unique=true; for(MatchSetCollection::const_iterator vsiCI=matchSets.begin(); vsiCI!=matchSets.end(); ++vsiCI){ if(vsiCI->first==featDef->getFamily() && std::includes(vsiCI->second.begin(),vsiCI->second.end(), matchSet.begin(),matchSet.end())){ unique=false; break; } } if(unique){ matchSets.push_back(std::make_pair(featDef->getFamily(),matchSet)); // Set up the feature: MolChemicalFeature *newFeat=new MolChemicalFeature(&mol,this,featDef.get(),idx++); MolChemicalFeature::AtomPtrContainer &atoms=newFeat->d_atoms; atoms.resize(match.size()); // set up the atoms: for(MatchVectType::const_iterator matchIt=match.begin(); matchIt!=match.end();matchIt++){ int atomIdx=matchIt->second; int queryIdx=matchIt->first; atoms[queryIdx]=mol.getAtomWithIdx(atomIdx); } // finally, add this to our result: res.push_back(FeatSPtrList::value_type(newFeat)); } } } } #ifdef USE_VFLIB #ifndef CACHE_ARMOLGRAPHS delete molG; #endif #endif return res; }
// caller owns the result, it must be deleted ExplicitBitVect *PatternFingerprintMol(const ROMol &mol, unsigned int fpSize, std::vector<unsigned int> *atomCounts, ExplicitBitVect *setOnlyBits) { PRECONDITION(fpSize != 0, "fpSize==0"); PRECONDITION(!atomCounts || atomCounts->size() >= mol.getNumAtoms(), "bad atomCounts size"); PRECONDITION(!setOnlyBits || setOnlyBits->getNumBits() == fpSize, "bad setOnlyBits size"); std::vector<const ROMol *> patts; patts.reserve(10); unsigned int idx = 0; while (1) { std::string pq = pqs[idx]; if (pq == "") break; ++idx; const ROMol *matcher = pattern_flyweight(pq).get().getMatcher(); CHECK_INVARIANT(matcher, "bad smarts"); patts.push_back(matcher); } if (!mol.getRingInfo()->isInitialized()) { MolOps::fastFindRings(mol); } boost::dynamic_bitset<> isQueryAtom(mol.getNumAtoms()), isQueryBond(mol.getNumBonds()); ROMol::VERTEX_ITER firstA, lastA; boost::tie(firstA, lastA) = mol.getVertices(); while (firstA != lastA) { const Atom *at = mol[*firstA].get(); if (isComplexQuery(at)) { isQueryAtom.set(at->getIdx()); // std::cerr<<" complex atom: "<<at->getIdx()<<std::endl; } ++firstA; } ROMol::EDGE_ITER firstB, lastB; boost::tie(firstB, lastB) = mol.getEdges(); while (firstB != lastB) { const Bond *bond = mol[*firstB].get(); // if( isComplexQuery(bond) ){ if (isPatternComplexQuery(bond)) { isQueryBond.set(bond->getIdx()); // std::cerr<<" complex bond: "<<bond->getIdx()<<std::endl; } ++firstB; } ExplicitBitVect *res = new ExplicitBitVect(fpSize); unsigned int pIdx = 0; BOOST_FOREACH (const ROMol *patt, patts) { ++pIdx; std::vector<MatchVectType> matches; // uniquify matches? // time for 10K molecules w/ uniquify: 5.24s // time for 10K molecules w/o uniquify: 4.87s SubstructMatch(mol, *patt, matches, false); boost::uint32_t mIdx = pIdx + patt->getNumAtoms() + patt->getNumBonds(); BOOST_FOREACH (MatchVectType &mv, matches) { #ifdef VERBOSE_FINGERPRINTING std::cerr << "\nPatt: " << pIdx << " | "; #endif // collect bits counting the number of occurances of the pattern: gboost::hash_combine(mIdx, 0xBEEF); res->setBit(mIdx % fpSize); #ifdef VERBOSE_FINGERPRINTING std::cerr << "count: " << mIdx % fpSize << " | "; #endif bool isQuery = false; boost::uint32_t bitId = pIdx; std::vector<unsigned int> amap(mv.size(), 0); BOOST_FOREACH (MatchVectType::value_type &p, mv) { #ifdef VERBOSE_FINGERPRINTING std::cerr << p.second << " "; #endif if (isQueryAtom[p.second]) { isQuery = true; #ifdef VERBOSE_FINGERPRINTING std::cerr << "atom query."; #endif break; } gboost::hash_combine(bitId, mol.getAtomWithIdx(p.second)->getAtomicNum()); amap[p.first] = p.second; }