Example #1
0
 double getAlignmentTransform(const ROMol &prbMol, const ROMol &refMol,
                                      RDGeom::Transform3D &trans,
                                      int prbCid, int refCid, 
                                      const MatchVectType *atomMap, 
                                      const RDNumeric::DoubleVector *weights, bool reflect,
                                      unsigned int maxIterations) {
   RDGeom::Point3DConstPtrVect refPoints, prbPoints;
   const Conformer &prbCnf = prbMol.getConformer(prbCid);
   const Conformer &refCnf = refMol.getConformer(refCid);
   if (atomMap == 0) {
     // we have to figure out the mapping between the two molecule
     MatchVectType match;
     if (SubstructMatch(refMol, prbMol, match)) {
       MatchVectType::const_iterator mi;
       for (mi = match.begin(); mi != match.end(); mi++) {
         prbPoints.push_back(&prbCnf.getAtomPos(mi->first));
         refPoints.push_back(&refCnf.getAtomPos(mi->second));
       }
     } else {
       throw MolAlignException("No sub-structure match found between the probe and query mol");
     } 
   } else {
     MatchVectType::const_iterator mi;
     for (mi = atomMap->begin(); mi != atomMap->end(); mi++) {
       prbPoints.push_back(&prbCnf.getAtomPos(mi->first));
       refPoints.push_back(&refCnf.getAtomPos(mi->second));
     }
   }
   double ssr = RDNumeric::Alignments::AlignPoints(refPoints, prbPoints, 
                                                   trans, weights, reflect, maxIterations);
   ssr /= (prbPoints.size());
   return sqrt(ssr);
 }
Example #2
0
  //*************************************************************************************
  //
  //  Adds 2D coordinates to a molecule using the Depict.dll
  //
  //  ARGUMENTS:
  //    mol:          the molecule to be altered
  //    tempFilename: (OPTIONAL) the name of the temporary file 
  //
  //  RETURNS:
  //   1 on success, 0 otherwise
  //
  //  Here's the process by which this works (it's kind of contorted):
  //   1) convert the mol to SMILES
  //   2) use the DLL to convert the SMILES to a mol file (on disk)
  //   3) parse the mol file into a temporary molecule
  //   4) do a substructure match from the old molecule to the
  //      temp one (which may have a different atom numbering or additional
  //      atoms added).
  //   5) Update the positions of the atoms on the old molecule.
  //   6) Free the temp molecule.
  //
  //  NOTES:
  //   - *FIX:* at the moment we're not doing anything to clear up the
  //     temp file created in this process.  It'll always have the same
  //     name unless the user explicitly asks that we do something different.
  //   - To use the DLL, it's essential that the COMBICHEM_ROOT and COMBICHEM_RELEASE
  //     environment variables be set.  If this isn't done, this whole process
  //     will fail.
  //   - See the notes above about failures when opening the DLL.
  //
  //*************************************************************************************
  int Add2DCoordsToMolDLL(ROMol &mol,std::string tempFilename){
    std::string smi=MolToSmiles(mol,true);
    int tmp = SmilesToMolFileDLL(smi,tempFilename);
    int res = -1;
    if(tmp){
      // build another mol from that mol file:
      RWMol *tmpMol = MolFileToMol(tempFilename,false);
      // match it up with the starting mol:
      //  (We need to do this because the depict.dll conversion
      //   to a mol file may have added Hs)
      MatchVectType matchVect;
      bool hasMatch=SubstructMatch(tmpMol,&mol,matchVect);
      if(hasMatch){
        const Conformer &conf = tmpMol->getCoformer(0);
        Coformer *nconf = new Coformer(mol.getNumAtoms());
	for(MatchVectType::const_iterator mvi=matchVect.begin();
	    mvi!=matchVect.end();mvi++){
	  
          nconf->setAtomPos(conf.getAtomPos(mvi->first));
	}
        confId = (int)mol.addConformer(nconf, true);
      }
      delete tmpMol;
    }
    return res;
  }
Example #3
0
bool StructCheckTautomer::applyTautomer(unsigned it) {
  if (Options.FromTautomer.size() <= it || Options.ToTautomer.size() <= it) {
    if (Options.Verbose)
      BOOST_LOG(rdInfoLog) << "ERROR: incorrect Tautomer index it=" << it
                           << "\n";
    return false;
  }
  const ROMol &fromTautomer = *Options.FromTautomer[it];
  const ROMol &toTautomer = *Options.ToTautomer[it];
  if (toTautomer.getNumAtoms() != fromTautomer.getNumAtoms()) {
    if (Options.Verbose)
      BOOST_LOG(rdInfoLog) << "ERROR: incorrect data toTautomer.getNumAtoms() "
                              "!= fromTautomer.getNumAtoms()\n";
    // incorrect data
    // throw(.....);
    return false;
  }
  const unsigned nta = toTautomer.getNumAtoms();
  const unsigned ntb = toTautomer.getNumBonds();
  MatchVectType match;  // The format is (queryAtomIdx, molAtomIdx)
  std::vector<unsigned> atomIdxMap(
      Mol.getNumAtoms());  // matched tau atom indeces

  if (!SubstructMatch(Mol, *Options.FromTautomer[it],
                      match))  // SSMatch(mp, from_tautomer, SINGLE_MATCH);
    return false;
  if (Options.Verbose)
    BOOST_LOG(rdInfoLog) << "found match for from_tautomer with " << nta
                         << " atoms\n";
  // init
  for (unsigned i = 0; i < Mol.getNumAtoms(); i++) atomIdxMap[i] = -1;
  for (MatchVectType::const_iterator mit = match.begin(); mit != match.end();
       mit++) {
    unsigned tai = mit->first;   // From and To Tautomer Atom index
    unsigned mai = mit->second;  // Mol Atom index
    atomIdxMap[mai] = tai;
  }
  // scan for completely mapped bonds and replace bond order with mapped bond
  // from to_tautomer
  for (RDKit::BondIterator_ bond = Mol.beginBonds(); bond != Mol.endBonds();
       bond++) {
    unsigned ti = atomIdxMap[(*bond)->getBeginAtomIdx()];
    unsigned tj = atomIdxMap[(*bond)->getEndAtomIdx()];
    if (-1 == ti || -1 == tj) continue;
    const Bond *tb = toTautomer.getBondBetweenAtoms(ti, tj);
    if (tb && (*bond)->getBondType() != tb->getBondType()) {
      (*bond)->setBondType(tb->getBondType());
    }
  }
  // apply charge/radical fixes if any
  for (unsigned i = 0; i < match.size(); i++) {
    Atom &atom = *Mol.getAtomWithIdx(match[i].second);
    const Atom &ta = *toTautomer.getAtomWithIdx(match[i].first);
    atom.setFormalCharge(ta.getFormalCharge());
    atom.setNumRadicalElectrons(ta.getNumRadicalElectrons());
  }

  return true;
}
Example #4
0
    void getCrippenAtomContribs(const ROMol &mol,
				std::vector< double > &logpContribs,
				std::vector< double > &mrContribs,
				bool force,
                                std::vector<unsigned int> *atomTypes,
                                std::vector<std::string> *atomTypeLabels
                                ){
      PRECONDITION(logpContribs.size()==mol.getNumAtoms() &&
		   mrContribs.size()==mol.getNumAtoms(),
		   "bad result vector size");
      PRECONDITION((!atomTypes || atomTypes->size()==mol.getNumAtoms()),
                   "bad atomTypes vector");
      PRECONDITION((!atomTypeLabels || atomTypeLabels->size()==mol.getNumAtoms()),
                   "bad atomTypeLabels vector");
      if(!force && mol.hasProp("_crippenLogPContribs")){
	std::vector<double> tmpVect1,tmpVect2;
	mol.getProp("_crippenLogPContribs",tmpVect1);
	mol.getProp("_crippenMRContribs",tmpVect2);
	if(tmpVect1.size()==mol.getNumAtoms() &&
	   tmpVect2.size()==mol.getNumAtoms() ){
	  logpContribs=tmpVect1;
	  mrContribs=tmpVect2;
	  return;
	}
      }
      
      boost::dynamic_bitset<> atomNeeded(mol.getNumAtoms());
      atomNeeded.set();
      const CrippenParamCollection *params=CrippenParamCollection::getParams();
      for(CrippenParamCollection::ParamsVect::const_iterator it=params->begin();
	  it!=params->end(); ++it){
	std::vector<MatchVectType> matches;
	SubstructMatch(mol,*(it->dp_pattern.get()),matches,
		       false,true);
	for(std::vector<MatchVectType>::const_iterator matchIt=matches.begin();
	    matchIt!=matches.end();++matchIt){
	  int idx=(*matchIt)[0].second;
	  if(atomNeeded[idx]){
	    atomNeeded[idx]=0;
	    logpContribs[idx] = it->logp;
	    mrContribs[idx] = it->mr;
            if(atomTypes) (*atomTypes)[idx]=it->idx;
            if(atomTypeLabels) (*atomTypeLabels)[idx]=it->label;
	  }
	}
	// no need to keep matching stuff if we already found all the atoms:
	if(atomNeeded.none()) break;
      }
      mol.setProp("_crippenLogPContribs",logpContribs,true);
      mol.setProp("_crippenMRContribs",mrContribs,true);
    }
Example #5
0
bool SmartsMatcher::hasMatch(const ROMol &mol) const {
    PRECONDITION(d_pattern.get(), "bad on pattern");

    if (d_min_count == 1 && d_max_count == UINT_MAX) {
        RDKit::MatchVectType matches;
        return SubstructMatch(mol, *d_pattern.get(), matches);
    } else {  // need to count
        const bool uniquify = true;
        std::vector<RDKit::MatchVectType> matches;
        unsigned int count =
            RDKit::SubstructMatch(mol, *d_pattern.get(), matches, uniquify);
        return (count >= d_min_count &&
                (d_max_count == UINT_MAX || count <= d_max_count));
    }
}
Example #6
0
void addTorsions(const ROMol &mol, MMFFMolProperties *mmffMolProperties,
                 ForceFields::ForceField *field,
                 const std::string &torsionBondSmarts) {
  PRECONDITION(field, "bad ForceField");
  PRECONDITION(mmffMolProperties, "bad MMFFMolProperties");
  PRECONDITION(mmffMolProperties->isValid(),
               "missing atom types - invalid force-field");

  std::ostream &oStream = mmffMolProperties->getMMFFOStream();
  ROMol::ADJ_ITER nbr1Idx;
  ROMol::ADJ_ITER end1Nbrs;
  ROMol::ADJ_ITER nbr2Idx;
  ROMol::ADJ_ITER end2Nbrs;
  double totalTorsionEnergy = 0.0;
  RDGeom::PointPtrVect points;
  if (mmffMolProperties->getMMFFVerbosity()) {
    if (mmffMolProperties->getMMFFVerbosity() == MMFF_VERBOSITY_HIGH) {
      oStream << "\n"
                 "T O R S I O N A L\n\n"
                 "--------------ATOMS---------------      ---ATOM TYPES---     "
                 "FF     TORSION              -----FORCE Params-----\n"
                 "  I        J        K        L          I    J    K    L   "
                 "CLASS     ANGLE    ENERGY       V1        V2        V3\n"
                 "-------------------------------------------------------------"
                 "-----------------------------------------------------"
              << std::endl;
    }
    points = field->positions();
  }
  std::vector<MatchVectType> matchVect;
  const ROMol *defaultQuery = DefaultTorsionBondSmarts::query();
  const ROMol *query = (torsionBondSmarts == DefaultTorsionBondSmarts::string())
                       ? defaultQuery : SmartsToMol(torsionBondSmarts);
  TEST_ASSERT(query);
  unsigned int nHits = SubstructMatch(mol, *query, matchVect);
  if (query != defaultQuery) delete query;

  for (unsigned int i = 0; i < nHits; ++i) {
    MatchVectType match = matchVect[i];
    TEST_ASSERT(match.size() == 2);
    int idx2 = match[0].second;
    int idx3 = match[1].second;
    const Bond *bond = mol.getBondBetweenAtoms(idx2, idx3);
    TEST_ASSERT(bond);
    const Atom *jAtom = mol.getAtomWithIdx(idx2);
    const Atom *kAtom = mol.getAtomWithIdx(idx3);
    if (((jAtom->getHybridization() == Atom::SP2) ||
         (jAtom->getHybridization() == Atom::SP3)) &&
        ((kAtom->getHybridization() == Atom::SP2) ||
         (kAtom->getHybridization() == Atom::SP3))) {
      ROMol::OEDGE_ITER beg1, end1;
      boost::tie(beg1, end1) = mol.getAtomBonds(jAtom);
      while (beg1 != end1) {
        const Bond *tBond1 = mol[*beg1].get();
        if (tBond1 != bond) {
          int idx1 = tBond1->getOtherAtomIdx(idx2);
          ROMol::OEDGE_ITER beg2, end2;
          boost::tie(beg2, end2) = mol.getAtomBonds(kAtom);
          while (beg2 != end2) {
            const Bond *tBond2 = mol[*beg2].get();
            if ((tBond2 != bond) && (tBond2 != tBond1)) {
              int idx4 = tBond2->getOtherAtomIdx(idx3);
              // make sure this isn't a three-membered ring:
              if (idx4 != idx1) {
                // we now have a torsion involving atoms (bonds):
                //  bIdx - (tBond1) - idx1 - (bond) - idx2 - (tBond2) - eIdx
                unsigned int torType;
                MMFFTor mmffTorParams;
                if (mmffMolProperties->getMMFFTorsionParams(
                        mol, idx1, idx2, idx3, idx4, torType, mmffTorParams)) {
                  TorsionAngleContrib *contrib = new TorsionAngleContrib(
                      field, idx1, idx2, idx3, idx4, &mmffTorParams);
                  field->contribs().push_back(ForceFields::ContribPtr(contrib));
                  if (mmffMolProperties->getMMFFVerbosity()) {
                    const Atom *iAtom = mol.getAtomWithIdx(idx1);
                    const Atom *lAtom = mol.getAtomWithIdx(idx4);
                    unsigned int iAtomType =
                        mmffMolProperties->getMMFFAtomType(idx1);
                    unsigned int jAtomType =
                        mmffMolProperties->getMMFFAtomType(idx2);
                    unsigned int kAtomType =
                        mmffMolProperties->getMMFFAtomType(idx3);
                    unsigned int lAtomType =
                        mmffMolProperties->getMMFFAtomType(idx4);
                    const RDGeom::Point3D p1((*(points[idx1]))[0],
                                             (*(points[idx1]))[1],
                                             (*(points[idx1]))[2]);
                    const RDGeom::Point3D p2((*(points[idx2]))[0],
                                             (*(points[idx2]))[1],
                                             (*(points[idx2]))[2]);
                    const RDGeom::Point3D p3((*(points[idx3]))[0],
                                             (*(points[idx3]))[1],
                                             (*(points[idx3]))[2]);
                    const RDGeom::Point3D p4((*(points[idx4]))[0],
                                             (*(points[idx4]))[1],
                                             (*(points[idx4]))[2]);
                    const double cosPhi =
                        MMFF::Utils::calcTorsionCosPhi(p1, p2, p3, p4);
                    const double torsionEnergy = MMFF::Utils::calcTorsionEnergy(
                        mmffTorParams.V1, mmffTorParams.V2, mmffTorParams.V3,
                        cosPhi);
                    if (mmffMolProperties->getMMFFVerbosity() ==
                        MMFF_VERBOSITY_HIGH) {
                      oStream
                          << std::left << std::setw(2) << iAtom->getSymbol()
                          << " #" << std::setw(5) << idx1 + 1 << std::setw(2)
                          << jAtom->getSymbol() << " #" << std::setw(5)
                          << idx2 + 1 << std::setw(2) << kAtom->getSymbol()
                          << " #" << std::setw(5) << idx3 + 1 << std::setw(2)
                          << lAtom->getSymbol() << " #" << std::setw(5)
                          << idx4 + 1 << std::right << std::setw(5) << iAtomType
                          << std::setw(5) << jAtomType << std::setw(5)
                          << kAtomType << std::setw(5) << lAtomType
                          << std::setw(6) << torType << "  " << std::fixed
                          << std::setprecision(3) << std::setw(10)
                          << RAD2DEG * acos(cosPhi) << std::setw(10)
                          << torsionEnergy << std::setw(10) << mmffTorParams.V1
                          << std::setw(10) << mmffTorParams.V2 << std::setw(10)
                          << mmffTorParams.V3 << std::endl;
                    }
                    totalTorsionEnergy += torsionEnergy;
                  }
                }
              }
            }
            beg2++;
          }
        }
        beg1++;
      }
    }
  }
  if (mmffMolProperties->getMMFFVerbosity()) {
    if (mmffMolProperties->getMMFFVerbosity() == MMFF_VERBOSITY_HIGH) {
      oStream << std::endl;
    }
    oStream << "TOTAL TORSIONAL ENERGY         =" << std::right << std::setw(16)
            << std::fixed << std::setprecision(4) << totalTorsionEnergy
            << std::endl;
  }
}
Example #7
0
      // ------------------------------------------------------------------------
      //
      //
      //
      // ------------------------------------------------------------------------
      void addTorsions(const ROMol &mol,const AtomicParamVect &params,
                       ForceFields::ForceField *field,
                       std::string torsionBondSmarts){
        PRECONDITION(mol.getNumAtoms()==params.size(),"bad parameters");
        PRECONDITION(field,"bad forcefield");

        // find all of the torsion bonds:
        std::vector<MatchVectType> matchVect;
        ROMol *query=SmartsToMol(torsionBondSmarts);
        TEST_ASSERT(query);
        unsigned int nHits=SubstructMatch(mol,*query,matchVect);
        delete query;

        for(unsigned int i=0; i<nHits; i++){
          MatchVectType match=matchVect[i];
          TEST_ASSERT(match.size()==2);
          int idx1=match[0].second;
          int idx2=match[1].second;
          if(!params[idx1]||!params[idx2]) continue;
          const Bond *bond=mol.getBondBetweenAtoms(idx1,idx2);
          std::vector<TorsionAngleContrib *> contribsHere;
          TEST_ASSERT(bond);
          const Atom *atom1=mol.getAtomWithIdx(idx1);
          const Atom *atom2=mol.getAtomWithIdx(idx2);

          if( (atom1->getHybridization()==Atom::SP2||atom1->getHybridization()==Atom::SP3) &&
              (atom2->getHybridization()==Atom::SP2||atom2->getHybridization()==Atom::SP3) ){
            ROMol::OEDGE_ITER beg1,end1;
            boost::tie(beg1,end1) = mol.getAtomBonds(atom1);
            while(beg1!=end1){
              const Bond *tBond1=mol[*beg1].get();
              if(tBond1!=bond){
                int bIdx = tBond1->getOtherAtomIdx(idx1);
                ROMol::OEDGE_ITER beg2,end2;
                boost::tie(beg2,end2) = mol.getAtomBonds(atom2);
                while(beg2 != end2){
                  const Bond *tBond2=mol[*beg2].get();
                  if(tBond2!=bond && tBond2!=tBond1){
                    int eIdx=tBond2->getOtherAtomIdx(idx2);
                    // make sure this isn't a three-membered ring:
                    if(eIdx != bIdx){
                      // we now have a torsion involving atoms (bonds):
                      //  bIdx - (tBond1) - idx1 - (bond) - idx2 - (tBond2) - eIdx
                      TorsionAngleContrib *contrib;

                      // if either of the end atoms is SP2 hybridized, set a flag
                      // here.  
                      bool hasSP2=false;
                      if(mol.getAtomWithIdx(bIdx)->getHybridization()==Atom::SP2 ||
                         mol.getAtomWithIdx(bIdx)->getHybridization()==Atom::SP2) {
                        hasSP2 = true;
                      }
                      //std::cout << "Torsion: " << bIdx << "-" << idx1 << "-" << idx2 << "-" << eIdx << std::endl;
                      //if(okToIncludeTorsion(mol,bond,bIdx,idx1,idx2,eIdx)){
                        //std::cout << "  INCLUDED" << std::endl;
                        contrib = new TorsionAngleContrib(field,bIdx,idx1,idx2,eIdx,
                                                          bond->getBondTypeAsDouble(),
                                                          atom1->getAtomicNum(),
                                                          atom2->getAtomicNum(),
                                                          atom1->getHybridization(),
                                                          atom2->getHybridization(),
                                                          params[idx1],params[idx2],
                                                          hasSP2);
                        field->contribs().push_back(ForceFields::ContribPtr(contrib));
                        contribsHere.push_back(contrib);
                      //}
                    }
                  }
                  beg2++;
                }
              }
              beg1++;
            }
          }
          // now divide the force constant for each contribution to the torsion energy
          // about this bond by the number of contribs about this bond:
          for(std::vector<TorsionAngleContrib *>::iterator chI=contribsHere.begin();
              chI!=contribsHere.end();++chI){
            (*chI)->scaleForceConstant(contribsHere.size());
          }
        }

      }
  MatchVectType findFuncGroupsOnMol(const ROMol &mol, 
				    const FragCatParams *params,
				    INT_VECT &fgBonds) {
    PRECONDITION(params,"bad params");

    fgBonds.clear();
    
    std::pair<int, int> amat;
    MatchVectType aidFgrps;
    std::vector<MatchVectType> fgpMatches;
    std::vector<MatchVectType>::const_iterator mati;
    MatchVectType::const_iterator mi;
    int aid;
    //const ROMol *fgrp;

    INT_VECT_CI bi;
    aidFgrps.clear();
    
    int fid = 0;
    const MOL_SPTR_VECT &fgrps = params->getFuncGroups();
    MOL_SPTR_VECT::const_iterator fgci;
    
    for (fgci = fgrps.begin(); fgci != fgrps.end(); fgci++) {
      const ROMol *fgrp = fgci->get();
      std::string fname;
      (*fgci)->getProp(common_properties::_Name, fname);
      //std::cout << "Groups number: " << fname << "\n";
      //(*fgci)->debugMol(std::cout);
      //mol->debugMol(std::cout);
      // match this functional group onto the molecule
      SubstructMatch(mol, *fgrp, fgpMatches);

      // loop over all the matches we get for this fgroup
      for (mati = fgpMatches.begin(); mati != fgpMatches.end(); mati++) {
	//FIX: we will assume that the first atom in fgrp is always the connection
	// atom
	amat = mati->front();
	aid = amat.second; //FIX: is this correct - the second entry in the pair is the atom ID from mol

	// grab the list of atom Ids from mol that match the functional group 
	INT_VECT bondIds, maids;
	for (mi = mati->begin(); mi != mati->end(); mi++) {
	  maids.push_back(mi->second);
	}

	// create a list of bond IDs from these atom ID 
	// these are the bond in mol that are part of portion that matches the 
	// functional group
	bondIds = Subgraphs::bondListFromAtomList(mol, maids);
	
	// now check if all these bonds have been covered as part of larger 
	// functional group that was dealt with earlier
	// FIX: obviously we assume here that the function groups in params 
	// come in decreasing order of size.
	bool allDone = true;
	for (bi = bondIds.begin(); bi != bondIds.end(); bi++) {
	  if (std::find(fgBonds.begin(), fgBonds.end(), (*bi)) == fgBonds.end()) {
	    allDone = false;
	    fgBonds.push_back(*bi);
	  }
	}
	
	if (!allDone) {
	  // this functional group mapping onto mol is not part of a larger func
	  // group mapping so record it
	  aidFgrps.push_back(std::pair<int, int>(aid, fid));
	}
      }
      fid++;

    }

    
    return aidFgrps;
  }
Example #9
0
bool fragmentMol(const ROMol& mol,
                 std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >& res,
                 unsigned int minCuts,
                 unsigned int maxCuts,
                 unsigned int maxCutBonds,
                 const std::string& pattern) {
#ifdef _DEBUG
  for (size_t i = 0; i < mol.getNumAtoms(); i++) {
    std::string symbol = mol.getAtomWithIdx(i)->getSymbol();
    int label = 0;
    mol.getAtomWithIdx(i)->getPropIfPresent(common_properties::molAtomMapNumber,
                                            label);
    char a1[32];
    if (0 == label)
      sprintf(a1, "\'%s\'", symbol.c_str(), label);
    else
      sprintf(a1, "\'%s:%u\'", symbol.c_str(), label);
    std::cout << "Atom " << i << ": " << a1;  //<<" Bonds:";
    std::cout << "\n";
  }
#endif

  res.clear();
  std::auto_ptr<const ROMol> smarts((const ROMol*)SmartsToMol(pattern));
  std::vector<MatchVectType>
      matching_atoms;  // one bond per match ! with default pattern
  unsigned int total = SubstructMatch(mol, *smarts, matching_atoms);
#ifdef _DEBUG
  std::cout << "total substructs =" << total
            << "\nmatching bonds (atom1, atom2):\n";
#endif
  if (0 == total)  // Not found.  Return empty set of molecules
    return false;
#ifdef _DEBUG
  for (size_t i = 0; i < matching_atoms.size(); i++) {
    std::string symbol =
        mol.getAtomWithIdx(matching_atoms[i][0].second)->getSymbol();
    int label = 0;
    mol.getAtomWithIdx(matching_atoms[i][0].second)
        ->getPropIfPresent(common_properties::molAtomMapNumber, label);
    char a1[32];
    if (0 == label)
      sprintf(a1, "\'%s\'", symbol.c_str(), label);
    else
      sprintf(a1, "\'%s:%u\'", symbol.c_str(), label);
    symbol = mol.getAtomWithIdx(matching_atoms[i][1].second)->getSymbol();
    label = 0;
    mol.getAtomWithIdx(matching_atoms[i][1].second)
        ->getPropIfPresent(common_properties::molAtomMapNumber, label);
    char a2[32];
    if (0 == label)
      sprintf(a2, "\'%s\'", symbol.c_str(), label);
    else
      sprintf(a2, "\'%s:%u\'", symbol.c_str(), label);

    std::cout << i << ": (" << matching_atoms[i][0].second << a1 << ","
              << matching_atoms[i][1].second << a2 << ") \n";
  }
#endif

  std::vector<BondVector_t> matching_bonds;  // List of matched query's bonds
  convertMatchingToBondVect(matching_bonds, matching_atoms, mol);
  if (matching_bonds.size() > maxCutBonds) return false;
#ifdef _DEBUG
  std::cout << "total matching_bonds = " << matching_bonds.size() << "\n";
#endif

  // loop to generate every cut in the molecule
  BondVector_t bonds_selected;
  processCuts(0, minCuts, maxCuts, bonds_selected, matching_bonds, mol, res);
  return true;
}
  FeatSPtrList MolChemicalFeatureFactory::getFeaturesForMol(const ROMol &mol,
                                                            const char* includeOnly) const {
    PRECONDITION(includeOnly,"bad limits");
    std::string limits(includeOnly);
    
#ifdef USE_VFLIB
    AR_MOLGRAPH *molG=getMolGraph(mol);
#endif
    FeatSPtrList res;
    int idx = 1;
    typedef std::vector< std::pair< std::string,std::set<int> > > MatchSetCollection;
    MatchSetCollection matchSets;
    for(MolChemicalFeatureDef::CollectionType::const_iterator featDefIt=beginFeatureDefs();
        featDefIt!=endFeatureDefs();featDefIt++){
      MolChemicalFeatureDef::CollectionType::value_type featDef=*featDefIt;
      if(limits=="" || limits==featDef->getFamily()){
        std::vector< MatchVectType > matches;
#ifdef USE_VFLIB
        unsigned int numMatches=SubstructMatch(molG,*featDef->getPattern(),matches);
#else
        unsigned int numMatches=SubstructMatch(mol,*featDef->getPattern(),matches);
#endif
        for(unsigned int i=0;i<numMatches;i++){
          const MatchVectType &match=matches[i];
          std::set<int> matchSet;
          for(MatchVectType::const_iterator mIt=match.begin();
              mIt!=match.end();
              ++mIt){
            matchSet.insert(mIt->second);
          }
          
          // loop over the matches we've already found and see if this one
          // is unique:
          bool unique=true;
          for(MatchSetCollection::const_iterator vsiCI=matchSets.begin();
              vsiCI!=matchSets.end();
              ++vsiCI){
            if(vsiCI->first==featDef->getFamily() &&
               std::includes(vsiCI->second.begin(),vsiCI->second.end(),
                             matchSet.begin(),matchSet.end())){
              unique=false;
              break;
            }
          }
          if(unique){
            matchSets.push_back(std::make_pair(featDef->getFamily(),matchSet));
    
            // Set up the feature:
            MolChemicalFeature *newFeat=new MolChemicalFeature(&mol,this,featDef.get(),idx++);
            MolChemicalFeature::AtomPtrContainer &atoms=newFeat->d_atoms;
            atoms.resize(match.size());
    
            // set up the atoms:
            for(MatchVectType::const_iterator matchIt=match.begin();
                        matchIt!=match.end();matchIt++){
              int atomIdx=matchIt->second;
              int queryIdx=matchIt->first;
              atoms[queryIdx]=mol.getAtomWithIdx(atomIdx);
            }
    
            // finally, add this to our result:
            res.push_back(FeatSPtrList::value_type(newFeat));
          }
        }
      }
    }
#ifdef USE_VFLIB
#ifndef CACHE_ARMOLGRAPHS
    delete molG;
#endif
#endif
    return res;
  }
Example #11
0
// caller owns the result, it must be deleted
ExplicitBitVect *PatternFingerprintMol(const ROMol &mol, unsigned int fpSize,
                                       std::vector<unsigned int> *atomCounts,
                                       ExplicitBitVect *setOnlyBits) {
  PRECONDITION(fpSize != 0, "fpSize==0");
  PRECONDITION(!atomCounts || atomCounts->size() >= mol.getNumAtoms(),
               "bad atomCounts size");
  PRECONDITION(!setOnlyBits || setOnlyBits->getNumBits() == fpSize,
               "bad setOnlyBits size");

  std::vector<const ROMol *> patts;
  patts.reserve(10);
  unsigned int idx = 0;
  while (1) {
    std::string pq = pqs[idx];
    if (pq == "") break;
    ++idx;
    const ROMol *matcher = pattern_flyweight(pq).get().getMatcher();
    CHECK_INVARIANT(matcher, "bad smarts");
    patts.push_back(matcher);
  }

  if (!mol.getRingInfo()->isInitialized()) {
    MolOps::fastFindRings(mol);
  }

  boost::dynamic_bitset<> isQueryAtom(mol.getNumAtoms()),
      isQueryBond(mol.getNumBonds());
  ROMol::VERTEX_ITER firstA, lastA;
  boost::tie(firstA, lastA) = mol.getVertices();
  while (firstA != lastA) {
    const Atom *at = mol[*firstA].get();
    if (isComplexQuery(at)) {
      isQueryAtom.set(at->getIdx());
      // std::cerr<<"   complex atom: "<<at->getIdx()<<std::endl;
    }
    ++firstA;
  }
  ROMol::EDGE_ITER firstB, lastB;
  boost::tie(firstB, lastB) = mol.getEdges();
  while (firstB != lastB) {
    const Bond *bond = mol[*firstB].get();
    // if( isComplexQuery(bond) ){
    if (isPatternComplexQuery(bond)) {
      isQueryBond.set(bond->getIdx());
      // std::cerr<<"   complex bond: "<<bond->getIdx()<<std::endl;
    }
    ++firstB;
  }

  ExplicitBitVect *res = new ExplicitBitVect(fpSize);
  unsigned int pIdx = 0;
  BOOST_FOREACH (const ROMol *patt, patts) {
    ++pIdx;
    std::vector<MatchVectType> matches;
    // uniquify matches?
    //   time for 10K molecules w/ uniquify: 5.24s
    //   time for 10K molecules w/o uniquify: 4.87s
    SubstructMatch(mol, *patt, matches, false);
    boost::uint32_t mIdx = pIdx + patt->getNumAtoms() + patt->getNumBonds();
    BOOST_FOREACH (MatchVectType &mv, matches) {
#ifdef VERBOSE_FINGERPRINTING
      std::cerr << "\nPatt: " << pIdx << " | ";
#endif
      // collect bits counting the number of occurances of the pattern:
      gboost::hash_combine(mIdx, 0xBEEF);
      res->setBit(mIdx % fpSize);
#ifdef VERBOSE_FINGERPRINTING
      std::cerr << "count: " << mIdx % fpSize << " | ";
#endif

      bool isQuery = false;
      boost::uint32_t bitId = pIdx;
      std::vector<unsigned int> amap(mv.size(), 0);
      BOOST_FOREACH (MatchVectType::value_type &p, mv) {
#ifdef VERBOSE_FINGERPRINTING
        std::cerr << p.second << " ";
#endif
        if (isQueryAtom[p.second]) {
          isQuery = true;
#ifdef VERBOSE_FINGERPRINTING
          std::cerr << "atom query.";
#endif
          break;
        }
        gboost::hash_combine(bitId,
                             mol.getAtomWithIdx(p.second)->getAtomicNum());
        amap[p.first] = p.second;
      }