std::vector<double> MolData3Ddescriptors::GetRelativeRcov(const RDKit::ROMol& mol){ int numAtoms= mol.getNumAtoms(); double* rcov=data3D.getRCOV(); std::vector<double> wroc(numAtoms, 0.0); for( int i=0; i<numAtoms; ++i){ wroc[i]=rcov[mol.getAtomWithIdx(i)->getAtomicNum()-1]/rcov[5]; } return wroc; }
std::vector<double> MolData3Ddescriptors::GetRelativeENeg(const RDKit::ROMol& mol){ int numAtoms= mol.getNumAtoms(); double* relativeNeg=data3D.getNEG(); std::vector<double> neg(numAtoms, 0.0); for( int i=0; i<numAtoms; ++i){ neg[i]=relativeNeg[mol.getAtomWithIdx(i)->getAtomicNum()-1]; } return neg; }
std::vector<double> MolData3Ddescriptors::GetRelativeMW(const RDKit::ROMol& mol){ double* relativeMw=data3D.getMW(); int numAtoms= mol.getNumAtoms(); std::vector<double> pol(numAtoms, 0.0); for( int i=0; i<numAtoms; ++i){ pol[i]=relativeMw[mol.getAtomWithIdx(i)->getAtomicNum()-1]; } return pol; }
std::vector<double> MolData3Ddescriptors::GetRelativeIonPol(const RDKit::ROMol& mol){ int numAtoms= mol.getNumAtoms(); double* absionpol=data3D.getIonPOL(); std::vector<double> ionpols(numAtoms, 0.0); for( int i=0; i<numAtoms; ++i){ ionpols[i]=absionpol[mol.getAtomWithIdx(i)->getAtomicNum()-1]; } return ionpols; }
std::vector<double> MolData3Ddescriptors::GetRelativeVdW(const RDKit::ROMol& mol){ int numAtoms= mol.getNumAtoms(); double* relativeVdW=data3D.getVDW(); std::vector<double> vdw(numAtoms, 0.0); for( int i=0; i<numAtoms; ++i){ vdw[i]=relativeVdW[mol.getAtomWithIdx(i)->getAtomicNum()-1]; } return vdw; }
double calcSASA(const RDKit::ROMol &mol, const std::vector<double> &radii, int confIdx, const RDKit::QueryAtom *query, const SASAOpts &opts) { double result = internalCalcSASA(mol, radii, confIdx, opts); if (query) { result = 0.0f; for (ROMol::ConstQueryAtomIterator at = mol.beginQueryAtoms(query); at != mol.endQueryAtoms(); ++at) { const Atom *atom = *at; result += atom->getProp<double>("SASA"); } } return result; }
unsigned int Compute2DCoords(RDKit::ROMol &mol, bool canonOrient, bool clearConfs, python::dict &coordMap, unsigned int nFlipsPerSample = 3, unsigned int nSamples = 100, int sampleSeed = 100, bool permuteDeg4Nodes = false, double bondLength = -1.0) { RDGeom::INT_POINT2D_MAP cMap; cMap.clear(); python::list ks = coordMap.keys(); for (unsigned int i = 0; i < python::extract<unsigned int>(ks.attr("__len__")()); i++) { unsigned int id = python::extract<unsigned int>(ks[i]); if (id >= mol.getNumAtoms()) { throw_value_error("atom index out of range"); } cMap[id] = python::extract<RDGeom::Point2D>(coordMap[id]); } double oBondLen = RDDepict::BOND_LEN; if (bondLength > 0) { RDDepict::BOND_LEN = bondLength; } unsigned int res; res = RDDepict::compute2DCoords(mol, &cMap, canonOrient, clearConfs, nFlipsPerSample, nSamples, sampleSeed, permuteDeg4Nodes); if (bondLength > 0) { RDDepict::BOND_LEN = oBondLen; } return res; }
// adaptation from EState.py // we need the Is value only there std::vector<double> MolData3Ddescriptors::GetEState(const RDKit::ROMol &mol){ int numAtoms = mol.getNumAtoms(); std::vector<double> Is =GetIState(mol); double tmp,p; double *dist = RDKit::MolOps::getDistanceMat(mol,false,false); double accum[numAtoms]; for (int i=0;i<numAtoms;i++) { accum[i]=0.0; } for (int i=0;i<numAtoms;i++) { for (int j=i+1;j<numAtoms;j++) { p = dist[i * numAtoms + j]+1; if (p < 1e6) { tmp = (Is[i] - Is[j]) / (p * p); accum[i] += tmp; accum[j] -= tmp; } } } for (int i=0;i<numAtoms;i++) { Is[i]+=accum[i]; } return Is; }
std::string MolpherMol::MolpherMolImpl::asMolBlock(bool include_locks) const { RDKit::ROMol* temp = asRDMol(include_locks); std::ostringstream os; os << RDKit::MolToMolBlock(*temp) << std::endl; if (include_locks) { RDKit::STR_VECT prop_names = temp->getPropList(); for (MolpherAtom::LockingMask lock : MolpherAtom::atom_locks) { std::string lock_prop_name("MOLPHER_" + MolpherAtom::lockToString(lock)); if (std::find(prop_names.begin(), prop_names.end(), lock_prop_name) != prop_names.end()) { os << "> <" << lock_prop_name << ">" << std::endl; os << temp->getProp<std::string>(lock_prop_name) << std::endl; os << std::endl; } } os << "$$$$" << std::endl; } delete temp; return os.str(); }
std::vector<double> MolData3Ddescriptors::GetIState(const RDKit::ROMol &mol){ int numAtoms = mol.getNumAtoms(); std::vector<double> Is; for (int i = 0; i < numAtoms; ++i) { const RDKit::Atom * atom= mol.getAtomWithIdx(i); int atNum=atom->getAtomicNum(); int degree = atom->getDegree(); if (degree>0 and atNum>1) { int h = atom->getTotalNumHs(); int Zv = RDKit::PeriodicTable::getTable()->getNouterElecs(atNum); double dv =(double) Zv-h; dv = dv / (double) (atNum-Zv-1); int N = GetPrincipalQuantumNumber(atNum); Is.push_back(round(1000*(4.0/(N*N)*dv+1.0)/degree)/1000); // WHIM-P5.pdf paper 1997 => +7 & NoHydrogens is used! } else Is.push_back(0); } return Is; }
void testautocorrelation() { std::cout << "=>start test rdf\n"; std::string pathName = getenv("RDBASE"); std::string sdfName = pathName + "/Code/GraphMol/Descriptors/test_data/chlorobenzene.sdf"; RDKit::SDMolSupplier reader(sdfName, true, false); int nDone = 0; while (!reader.atEnd()) { ++nDone; RDKit::ROMol *m = reader.next(); TEST_ASSERT(m); std::string nm; m->getProp("_Name",nm); std::vector<double> dwhim; //for (int i=1;i<11;i++) { // std::cout << "i:" << 0.005*i << "\n"; dwhim = RDKit::Descriptors::AUTOCORR3D(*m, -1); for (int j=0;j<80;j++) { std::cout << dwhim[j] << ","; } std::cout << "\n"; //} std::cout << "=>read molecule: " << nDone << std::endl; delete m; } BOOST_LOG(rdErrorLog) << " done" << std::endl; }
// modification of previous code to follow documentation from Padel code std::vector<double> MolData3Ddescriptors::GetEState2(const RDKit::ROMol &mol){ int numAtoms = mol.getNumAtoms(); std::vector<double> Si =GetIState(mol); // in WHIM definition it's write: double tmp,p,d; double *dist = RDKit::MolOps::getDistanceMat(mol,false,false); double accum[numAtoms]; for (int i=0;i<numAtoms;i++) { accum[i]=0.0; } for (int i=0;i<numAtoms;i++) { for (int j=i+1;j<numAtoms;j++) { d = dist[i * numAtoms + j]; p = dist[i * numAtoms + j]+1; if (d == 1) { tmp = (Si[i] - Si[j]) / (p * p); accum[i] += tmp; accum[j] -= tmp; } } } // add the Accum to the Si // WHIM Si values // electrotopological indices are scaled thus: Si'=Si + 7 => Si' > 0 // In this case, only the nonhydrogen atoms are considered, // and the atomic electrotopological charge of each atom depends on its atom neighbor. // So we should not use all the terms in the sum but only Adj matrix cases! // Correct the Si adding the rescaling parameter for WHIM only for (int i=0;i<numAtoms;i++) { Si[i]+=accum[i]+7.0; } return Si; }
unsigned int Compute2DCoordsMimicDistmat( RDKit::ROMol &mol, python::object distMat, bool canonOrient, bool clearConfs, double weightDistMat, unsigned int nFlipsPerSample, unsigned int nSamples, int sampleSeed, bool permuteDeg4Nodes, double bondLength = -1.0) { PyObject *distMatPtr = distMat.ptr(); if (!PyArray_Check(distMatPtr)) { throw_value_error("Argument isn't an array"); } PyArrayObject *dmatrix = reinterpret_cast<PyArrayObject *>(distMatPtr); unsigned int nitems = PyArray_DIM(dmatrix, 0); unsigned int na = mol.getNumAtoms(); if (nitems != na * (na - 1) / 2) { throw_value_error( "The array size does not match the number of atoms in the molecule"); } double *inData = reinterpret_cast<double *>(PyArray_DATA(dmatrix)); double *cData = new double[nitems]; memcpy(static_cast<void *>(cData), static_cast<const void *>(inData), nitems * sizeof(double)); DOUBLE_SMART_PTR dmat(cData); double oBondLen = RDDepict::BOND_LEN; if (bondLength > 0) { RDDepict::BOND_LEN = bondLength; } unsigned int res; res = RDDepict::compute2DCoordsMimicDistMat( mol, &dmat, canonOrient, clearConfs, weightDistMat, nFlipsPerSample, nSamples, sampleSeed, permuteDeg4Nodes); if (bondLength > 0) { RDDepict::BOND_LEN = oBondLen; } return res; }
std::vector<double> MolData3Ddescriptors::GetCharges(const RDKit::ROMol &mol) { std::vector<double> charges(mol.getNumAtoms(), 0); // use 12 iterations... can be more RDKit::computeGasteigerCharges(mol, charges, 12, true); return charges; }
void getExperimentalTorsions(const RDKit::ROMol &mol, CrystalFFDetails &details, bool useExpTorsions, bool useBasicKnowledge, unsigned int version, bool verbose) { unsigned int nb = mol.getNumBonds(); unsigned int na = mol.getNumAtoms(); if (!na) { throw ValueErrorException("molecule has no atoms"); } // check that vectors are empty details.expTorsionAtoms.clear(); details.expTorsionAngles.clear(); details.improperAtoms.clear(); unsigned int aid1, aid2, aid3, aid4; unsigned int bid2; boost::dynamic_bitset<> doneBonds(nb); if (useExpTorsions) { // we set the torsion angles with experimental data const ExpTorsionAngleCollection *params = ExpTorsionAngleCollection::getParams(version); // loop over patterns for (const auto ¶m : *params) { std::vector<MatchVectType> matches; SubstructMatch(mol, *(param.dp_pattern.get()), matches, false, true); // loop over matches for (std::vector<MatchVectType>::const_iterator matchIt = matches.begin(); matchIt != matches.end(); ++matchIt) { // get bond indices aid1 = (*matchIt)[param.idx[0]].second; aid2 = (*matchIt)[param.idx[1]].second; aid3 = (*matchIt)[param.idx[2]].second; aid4 = (*matchIt)[param.idx[3]].second; // FIX: check if bond is NULL bid2 = mol.getBondBetweenAtoms(aid2, aid3)->getIdx(); if (!doneBonds[bid2]) { doneBonds[bid2] = 1; std::vector<int> atoms(4); atoms[0] = aid1; atoms[1] = aid2; atoms[2] = aid3; atoms[3] = aid4; details.expTorsionAtoms.push_back(atoms); details.expTorsionAngles.push_back( std::make_pair(param.signs, param.V)); if (verbose) { std::cout << param.smarts << ": " << aid1 << " " << aid2 << " " << aid3 << " " << aid4 << ", ("; for (unsigned int i = 0; i < param.V.size() - 1; ++i) { std::cout << param.V[i] << ", "; } std::cout << param.V[param.V.size() - 1] << ") " << std::endl; } } // if not donePaths } // end loop over matches } // end loop over patterns } // apply basic knowledge such as flat aromatic rings, other sp2-centers, // straight triple bonds, etc. if (useBasicKnowledge) { boost::dynamic_bitset<> doneAtoms(na); ROMol::ADJ_ITER nbrIdx; ROMol::ADJ_ITER endNbrs; // inversion terms (improper torsions / out-of-plane bends / inversion) // loop over atoms for (aid2 = 0; aid2 < na; ++aid2) { if (!(doneAtoms[aid2])) { std::vector<int> atoms(4, -1); atoms[1] = aid2; const Atom *atom2 = mol.getAtomWithIdx(atoms[1]); int at2AtomicNum = atom2->getAtomicNum(); // if atom is a N,O or C and SP2-hybridized if (((at2AtomicNum == 6) || (at2AtomicNum == 7) || (at2AtomicNum == 8)) && (atom2->getHybridization() == Atom::SP2)) { // get neighbors boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom2); // check if enough neighbours if (mol.getAtomDegree(atom2) != 3) { continue; } unsigned int i = 0; unsigned int isBoundToSP2O = 0; // false for (; nbrIdx != endNbrs; ++nbrIdx) { const Atom *atomX = mol[*nbrIdx]; atoms[i] = atomX->getIdx(); // if the central atom is sp2 carbon and is bound to sp2 oxygen, set // a flag if (!isBoundToSP2O) { isBoundToSP2O = ((at2AtomicNum == 6) && (atomX->getAtomicNum() == 8) && (atomX->getHybridization() == Atom::SP2)); } if (!i) { ++i; } ++i; } atoms.push_back(at2AtomicNum); atoms.push_back(isBoundToSP2O); details.improperAtoms.push_back(atoms); /*if (verbose) { std::cout << "out-of-plane bend: " << atoms[0] << " " << atoms[1] << " " << atoms[2] << " " << atoms[3] << std::endl; }*/ } } // if atom is a N,O or C and SP2-hybridized } // torsions for flat rings const RingInfo *rinfo = mol.getRingInfo(); // FIX: make sure we have ring info CHECK_INVARIANT(rinfo, ""); const VECT_INT_VECT &atomRings = rinfo->atomRings(); for (const auto &atomRing : atomRings) { unsigned int rSize = atomRing.size(); // we don't need to deal with 3 membered rings // and we do not treat rings greater than 6 if (rSize < 4 || rSize > 6) { continue; } // loop over ring atoms for (unsigned int i = 0; i < rSize; ++i) { // proper torsions aid1 = atomRing[i]; aid2 = atomRing[(i + 1) % rSize]; aid3 = atomRing[(i + 2) % rSize]; aid4 = atomRing[(i + 3) % rSize]; bid2 = mol.getBondBetweenAtoms(aid2, aid3)->getIdx(); // if all 4 atoms are SP2, add torsion if (!(doneBonds[bid2]) && (mol.getAtomWithIdx(aid1)->getHybridization() == Atom::SP2) && (mol.getAtomWithIdx(aid2)->getHybridization() == Atom::SP2) && (mol.getAtomWithIdx(aid3)->getHybridization() == Atom::SP2) && (mol.getAtomWithIdx(aid4)->getHybridization() == Atom::SP2)) { doneBonds[bid2] = 1; std::vector<int> atoms(4); atoms[0] = aid1; atoms[1] = aid2; atoms[2] = aid3; atoms[3] = aid4; details.expTorsionAtoms.push_back(atoms); std::vector<int> signs(6, 1); signs[1] = -1; // MMFF sign for m = 2 std::vector<double> fconsts(6, 0.0); fconsts[1] = 100.0; // 7.0 is MMFF force constants for aromatic rings details.expTorsionAngles.push_back(std::make_pair(signs, fconsts)); /*if (verbose) { std::cout << "SP2 ring: " << aid1 << " " << aid2 << " " << aid3 << " " << aid4 << std::endl; }*/ } } // loop over atoms in ring } // loop over rings } // if useBasicKnowledge } // end function
void testMORSE() { std::cout << "=>start test MORSE\n"; std::string pathName = getenv("RDBASE"); std::string sdfName = pathName + "/Code/GraphMol/Descriptors/test_data/PBF_egfr.sdf"; RDKit::SDMolSupplier reader(sdfName, true, false); std::string fName = pathName + "/Code/GraphMol/Descriptors/test_data/MORSE.out"; std::ifstream instrm(fName.c_str()); std::string line; std::vector<std::vector<std::string> > data; while (std::getline(instrm, line)) { std::string phrase; std::vector<std::string> row; std::stringstream ss(line); while (std::getline(ss, phrase, '\t')) { row.push_back(phrase); } data.push_back(row); } std::cout << "=>read file\n"; int nDone = 0; while (!reader.atEnd()) { RDKit::ROMol *m = reader.next(); TEST_ASSERT(m); std::string nm; m->getProp("_Name", nm); std::vector<double> dmorse; RDKit::Descriptors::MORSE(*m, dmorse, -1); std::vector<std::string> myrow = data[nDone]; std::string inm = myrow[0]; TEST_ASSERT(inm == nm); for (int i = 0; i < dmorse.size(); i++) { double ref = atof(myrow[i + 1].c_str()); if (fabs(ref) > 0.01) { if (fabs((ref - dmorse[i]) / ref) > 1) { std::cout << "value mismatch: pos" << i << " " << inm << " " << ref << " " << dmorse[i] << std::endl; } } if (fabs(ref) < 0.01) { if (fabs(ref - dmorse[i]) > 0.02) { std::cout << "value mismatch: pos" << i << " " << inm << " " << ref << " " << dmorse[i] << std::endl; } } if (ref > 1 && fabs(ref - dmorse[i]) / ref > 0.02) { std::cout << "value mismatch: pos" << i << " " << inm << " " << ref << " " << dmorse[i] << std::endl; } // we're testing reasonably sized values and want to be sure that we're // within 2% of the reference. TEST_ASSERT(ref < 1 || fabs(ref - dmorse[i]) / ref < 0.02); } delete m; ++nDone; } BOOST_LOG(rdErrorLog) << "test on : " << nDone << " molecules done" << std::endl; }
void canonicalizeMol(RDKit::ROMol &mol, bool normalizeCovar, bool ignoreHs) { ROMol::ConformerIterator ci; for (ci = mol.beginConformers(); ci != mol.endConformers(); ci++) { canonicalizeConformer(*(*ci), 0, normalizeCovar, ignoreHs); } }
void testGETAWAY() { std::cout << "=>start test GETAWAY\n"; std::string pathName = getenv("RDBASE"); std::string sdfName = pathName + "/Code/GraphMol/Descriptors/test_data/PBF_egfr.sdf"; RDKit::SDMolSupplier reader(sdfName, true, false); std::string fName = pathName + "/Code/GraphMol/Descriptors/test_data/GETAWAY.new.out"; std::ifstream instrm(fName.c_str()); // std::string ofName = // pathName + "/Code/GraphMol/Descriptors/test_data/GETAWAY.new.out"; // std::ofstream outstrm(ofName.c_str()); std::string line; std::vector<std::vector<std::string> > data; while (std::getline(instrm, line)) { std::string phrase; std::vector<std::string> row; std::stringstream ss(line); while (std::getline(ss, phrase, '\t')) { row.push_back(phrase); } data.push_back(row); } int nDone = 0; while (!reader.atEnd()) { // if (nDone > 10) { // break; // } RDKit::ROMol *m = reader.next(); TEST_ASSERT(m); std::string nm; m->getProp("_Name", nm); std::vector<double> dgetaway; RDKit::Descriptors::GETAWAY(*m, dgetaway); std::vector<std::string> myrow = data[nDone]; std::string inm = myrow[0]; TEST_ASSERT(inm == nm); // std::cout << "\n"; // int numAtoms = m->getNumAtoms(); // std::cout << "number of Atoms : " << numAtoms << "\n"; // outstrm << nm << "\t"; for (int i = 0; i < 273; i++) { double ref = atof(myrow[i + 1].c_str()); if (fabs(ref) > 1) { if (fabs((ref - dgetaway[i]) / ref) > 0.01) { std::cerr << "value mismatch: pos" << i << " " << inm << " dragon: " << ref << " rdkit: " << dgetaway[i] << std::endl; } } if (fabs(ref) <= 1) { if (fabs(ref - dgetaway[i]) > 0.02) { std::cerr << "value mismatch: pos" << i << " " << inm << " dragon: " << ref << " rdkit: " << dgetaway[i] << std::endl; } } // if (i != 0) outstrm << "\t"; // outstrm << dgetaway[i]; TEST_ASSERT(fabs(ref - dgetaway[i]) < 0.05); } // outstrm << "\n"; delete m; ++nDone; // if (nDone > 50) break; } BOOST_LOG(rdErrorLog) << "test on : " << nDone << " molecules done" << std::endl; }