void _computeCovarianceMat(const RDGeom::Point3DConstPtrVect &refPoints, const RDGeom::Point3DConstPtrVect &probePoints, const DoubleVector &weights, double covMat[3][3]) { unsigned int i, j; for (i = 0; i < 3; i++) { for (j = 0; j < 3; j++) { covMat[i][j] = 0.0; } } unsigned int npt = refPoints.size(); CHECK_INVARIANT(npt == probePoints.size(), "Number of points mismatch"); CHECK_INVARIANT(npt == weights.size(), "Number of points and number of weights do not match"); const double *wData = weights.getData(); const RDGeom::Point3D *rpt, *ppt; double w; for (i = 0; i < npt; i++) { rpt = refPoints[i]; ppt = probePoints[i]; w = wData[i]; covMat[0][0] += w * (ppt->x) * (rpt->x); covMat[0][1] += w * (ppt->x) * (rpt->y); covMat[0][2] += w * (ppt->x) * (rpt->z); covMat[1][0] += w * (ppt->y) * (rpt->x); covMat[1][1] += w * (ppt->y) * (rpt->y); covMat[1][2] += w * (ppt->y) * (rpt->z); covMat[2][0] += w * (ppt->z) * (rpt->x); covMat[2][1] += w * (ppt->z) * (rpt->y); covMat[2][2] += w * (ppt->z) * (rpt->z); } }
double pickRandomDistMat(const BoundsMatrix &mmat, RDNumeric::SymmMatrix<double> &distMat, int seed) { // make sure the sizes match up unsigned int npt = mmat.numRows(); CHECK_INVARIANT(npt == distMat.numRows(), "Size mismatch"); RDKit::rng_type &generator = RDKit::getRandomGenerator(); if (seed > 0) { generator.seed(seed); } double largestVal=-1.0; double *ddata = distMat.getData(); for (unsigned int i = 1; i < npt; i++) { unsigned int id = i*(i+1)/2; for (unsigned int j = 0; j < i; j++) { double ub = mmat.getUpperBound(i,j); double lb = mmat.getLowerBound(i,j); CHECK_INVARIANT(ub >= lb, ""); double rval = RDKit::getRandomVal(); double d = lb + (rval)*(ub - lb); ddata[id+j] = d; if(d>largestVal){ largestVal=d; } } } return largestVal; }
double pickRandomDistMat(const BoundsMatrix &mmat, RDNumeric::SymmMatrix<double> &distMat, RDKit::double_source_type &rng) { // make sure the sizes match up unsigned int npt = mmat.numRows(); CHECK_INVARIANT(npt == distMat.numRows(), "Size mismatch"); double largestVal = -1.0; double *ddata = distMat.getData(); for (unsigned int i = 1; i < npt; i++) { unsigned int id = i * (i + 1) / 2; for (unsigned int j = 0; j < i; j++) { double ub = mmat.getUpperBound(i, j); double lb = mmat.getLowerBound(i, j); CHECK_INVARIANT(ub >= lb, ""); double rval = rng(); // std::cerr<<i<<"-"<<j<<": "<<rval<<std::endl; double d = lb + (rval) * (ub - lb); ddata[id + j] = d; if (d > largestVal) { largestVal = d; } } } return largestVal; }
SmilesMolSupplier::SmilesMolSupplier(const std::string &fileName, const std::string &delimiter, int smilesColumn, int nameColumn, bool titleLine, bool sanitize) { init(); // FIX: this binary mode of opening file is here because of a bug in VC++ 6.0 // the function "tellg" does not work correctly if we do not open it this way // Need to check if this has been fixed in VC++ 7.0 std::ifstream *tmpStream = new std::ifstream(fileName.c_str(), std::ios_base::binary); if (!tmpStream || (!(*tmpStream)) || (tmpStream->bad())) { std::ostringstream errout; errout << "Bad input file " << fileName; throw BadFileException(errout.str()); } dp_inStream = static_cast<std::istream *>(tmpStream); CHECK_INVARIANT(dp_inStream, "bad instream"); CHECK_INVARIANT(!(dp_inStream->eof()), "early EOF"); d_delim = delimiter; df_sanitize = sanitize; df_title = titleLine; d_smi = smilesColumn; d_name = nameColumn; df_end = false; // if(d_title) processTitleLine(); this->checkForEnd(); POSTCONDITION(dp_inStream, "bad instream"); }
// Check the chirality of atoms not directly involved in the reaction void checkAndCorrectChiralityOfProduct( const std::vector<const Atom *> &chiralAtomsToCheck, RWMOL_SPTR product, ReactantProductAtomMapping *mapping) { for (auto reactantAtom : chiralAtomsToCheck) { CHECK_INVARIANT(reactantAtom->getChiralTag() != Atom::CHI_UNSPECIFIED, "missing atom chirality."); const auto reactAtomDegree = reactantAtom->getOwningMol().getAtomDegree(reactantAtom); for (unsigned i = 0; i < mapping->reactProdAtomMap[reactantAtom->getIdx()].size(); i++) { unsigned productAtomIdx = mapping->reactProdAtomMap[reactantAtom->getIdx()][i]; Atom *productAtom = product->getAtomWithIdx(productAtomIdx); CHECK_INVARIANT( reactantAtom->getChiralTag() == productAtom->getChiralTag(), "invalid product chirality."); if (reactAtomDegree != product->getAtomDegree(productAtom)) { // If the number of bonds to the atom has changed in the course of the // reaction we're lost, so remove chirality. // A word of explanation here: the atoms in the chiralAtomsToCheck set // are not explicitly mapped atoms of the reaction, so we really have // no idea what to do with this case. At the moment I'm not even really // sure how this could happen, but better safe than sorry. productAtom->setChiralTag(Atom::CHI_UNSPECIFIED); } else if (reactantAtom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW || reactantAtom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW) { // this will contain the indices of product bonds in the // reactant order: INT_LIST newOrder; ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = reactantAtom->getOwningMol().getAtomBonds(reactantAtom); while (beg != end) { const Bond *reactantBond = reactantAtom->getOwningMol()[*beg]; unsigned int oAtomIdx = reactantBond->getOtherAtomIdx(reactantAtom->getIdx()); CHECK_INVARIANT(mapping->reactProdAtomMap.find(oAtomIdx) != mapping->reactProdAtomMap.end(), "other atom from bond not mapped."); const Bond *productBond; unsigned neighborBondIdx = mapping->reactProdAtomMap[oAtomIdx][i]; productBond = product->getBondBetweenAtoms(productAtom->getIdx(), neighborBondIdx); CHECK_INVARIANT(productBond, "no matching bond found in product"); newOrder.push_back(productBond->getIdx()); ++beg; } int nSwaps = productAtom->getPerturbationOrder(newOrder); if (nSwaps % 2) { productAtom->invertChirality(); } } else { // not tetrahedral chirality, don't do anything. } } } // end of loop over chiralAtomsToCheck }
void TDTMolSupplier::moveTo(unsigned int idx) { PRECONDITION(dp_inStream,"no stream"); CHECK_INVARIANT(idx >= 0, ""); // dp_inStream->seekg() is called for all idx values // and earlier calls to next() may have put the stream into a bad state dp_inStream->clear(); // move until we hit the desired idx if (idx < d_molpos.size() ) { dp_inStream->seekg(d_molpos[idx]); d_last = idx; } else { std::string tempStr; d_last = d_molpos.size() - 1; dp_inStream->seekg(d_molpos.back()); while ((d_last < static_cast<int>(idx)) && (!dp_inStream->eof()) ) { d_line++; std::getline(*dp_inStream,tempStr); if (tempStr.find("|") == 0) { d_molpos.push_back(dp_inStream->tellg()); d_last++; } } // if we reached end of file without reaching "idx" we have an index error if (dp_inStream->eof()) { d_len = d_molpos.size(); std::ostringstream errout; errout << "ERROR: Index error (idx = " << idx << ") : " << " we do no have enough molecule blocks"; throw FileParseException(errout.str()); } } }
double infoEntropy(python::object resArr) { PyObject *matObj = resArr.ptr(); if (!PyArray_Check(matObj)) { throw_value_error("Expecting a Numeric array object"); } PyArrayObject *copy; copy = (PyArrayObject *)PyArray_ContiguousFromObject( matObj, ((PyArrayObject *)matObj)->descr->type_num, 1, 1); double res = 0.0; // we are expecting a 1 dimensional array long int ncols = (long int)((PyArrayObject *)matObj)->dimensions[0]; CHECK_INVARIANT(ncols > 0, ""); if (((PyArrayObject *)matObj)->descr->type_num == PyArray_DOUBLE) { double *data = (double *)copy->data; res = InfoEntropy(data, ncols); } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_FLOAT) { float *data = (float *)copy->data; res = InfoEntropy(data, ncols); } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_INT) { int *data = (int *)copy->data; res = InfoEntropy(data, ncols); } else if (((PyArrayObject *)matObj)->descr->type_num == PyArray_LONG) { long int *data = (long int *)copy->data; res = InfoEntropy(data, ncols); } Py_DECREF(copy); return res; }
ReactantProductAtomMapping* getAtomMappingsReactantProduct(const MatchVectType &match, const ROMol& reactantTemplate, RWMOL_SPTR product, unsigned numReactAtoms) { ReactantProductAtomMapping *mapping = new ReactantProductAtomMapping(numReactAtoms); for(unsigned int i=0; i<match.size(); i++) { const Atom *templateAtom=reactantTemplate.getAtomWithIdx(match[i].first); int molAtomMapNumber; if(templateAtom->getPropIfPresent(common_properties::molAtomMapNumber, molAtomMapNumber)) { if(product->hasAtomBookmark(molAtomMapNumber)) { RWMol::ATOM_PTR_LIST atomIdxs = product->getAllAtomsWithBookmark(molAtomMapNumber); for(RWMol::ATOM_PTR_LIST::iterator iter = atomIdxs.begin(); iter != atomIdxs.end(); ++iter) { Atom * a = *iter; unsigned int pIdx = a->getIdx(); mapping->reactProdAtomMap[match[i].second].push_back(pIdx); mapping->mappedAtoms[match[i].second]=1; CHECK_INVARIANT(pIdx<product->getNumAtoms(),"yikes!"); mapping->prodReactAtomMap[pIdx]=match[i].second; } } else { // this skippedAtom has an atomMapNumber, but it's not in this product // (it's either in another product or it's not mapped at all). mapping->skippedAtoms[match[i].second]=1; } } else { // This skippedAtom appears in the match, but not in a product: mapping->skippedAtoms[match[i].second]=1; } } return mapping; }
PyObject *getTanimotoSimMat(python::object bitVectList) { // we will assume here that we have a either a list of ExplicitBitVectors or // SparseBitVects int nrows = python::extract<int>(bitVectList.attr("__len__")()); CHECK_INVARIANT(nrows > 1, ""); // First check what type of vector we have python::object v1 = bitVectList[0]; python::extract<ExplicitBitVect> ebvWorks(v1); python::extract<SparseBitVect> sbvWorks(v1); if(!ebvWorks.check() && !sbvWorks.check()) { throw_value_error("GetTanimotoDistMat can only take a sequence of ExplicitBitVects or SparseBitvects"); } npy_intp dMatLen = nrows*(nrows-1)/2; PyArrayObject *simRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE); double *sMat = (double *)simRes->data; if (ebvWorks.check()) { PySequenceHolder<ExplicitBitVect> dData(bitVectList); MetricMatrixCalc<PySequenceHolder<ExplicitBitVect>, ExplicitBitVect> mmCalc; mmCalc.setMetricFunc(&TanimotoSimilarityMetric<ExplicitBitVect, ExplicitBitVect>); mmCalc.calcMetricMatrix(dData, nrows, 0, sMat); } else if (sbvWorks.check()) { PySequenceHolder<SparseBitVect> dData(bitVectList); MetricMatrixCalc<PySequenceHolder<SparseBitVect>, SparseBitVect> mmCalc; mmCalc.setMetricFunc(&TanimotoSimilarityMetric<SparseBitVect, SparseBitVect>); mmCalc.calcMetricMatrix(dData, nrows, 0, sMat); } return PyArray_Return(simRes); }
TDTMolSupplier::TDTMolSupplier(std::istream *inStream, bool takeOwnership, const std::string &nameRecord, int confId2D, int confId3D, bool sanitize) { CHECK_INVARIANT(inStream, "bad instream"); CHECK_INVARIANT(!(inStream->eof()), "early EOF"); init(); dp_inStream = inStream; df_owner = takeOwnership; d_confId2D = confId2D; d_confId3D = confId3D; d_nameProp = nameRecord; this->advanceToNextRecord(); d_molpos.push_back(dp_inStream->tellg()); df_sanitize = sanitize; this->checkForEnd(); }
RDKit::INT_VECT HierarchicalClusterPicker::pick(const double *distMat, unsigned int poolSize, unsigned int pickSize) const { PRECONDITION(distMat,"bad distance matrix"); RDKit::VECT_INT_VECT clusters = this->cluster(distMat, poolSize, pickSize); CHECK_INVARIANT(clusters.size() == pickSize, ""); // the last step: find a representative element from each of the // remaining clusters RDKit::INT_VECT picks; for (unsigned int i = 0; i < pickSize; i++) { int pick; double minSumD2 = RDKit::MAX_DOUBLE; for (RDKit::INT_VECT_CI cxi1 = clusters[i].begin(); cxi1 != clusters[i].end(); ++cxi1 ) { int curPick = (*cxi1); double d2sum = 0.0; for (RDKit::INT_VECT_CI cxi2 = clusters[i].begin(); cxi2 != clusters[i].end(); ++cxi2) { if (cxi1 == cxi2) { continue; } double d = getDistFromLTM(distMat, curPick, (*cxi2)); d2sum += (d*d); } if (d2sum < minSumD2) { pick = curPick; minSumD2 = d2sum; } } picks.push_back(pick); } return picks; }
INT_LIST getShortestPath(const ROMol &mol, int aid1, int aid2) { int nats = mol.getNumAtoms(); RANGE_CHECK(0,aid1,nats-1); RANGE_CHECK(0,aid2,nats-1); CHECK_INVARIANT(aid1 != aid2, ""); INT_VECT pred, doneAtms; //pred.reserve(nats); //doneAtms.reserve(nats); pred.resize(nats); doneAtms.resize(nats); int ai; for (ai = 0; ai < nats; ++ai) { doneAtms[ai] = 0; } std::deque<int> bfsQ; bfsQ.push_back(aid1); bool done = false; ROMol::ADJ_ITER nbrIdx,endNbrs; while ((!done) && (bfsQ.size() > 0)) { int curAid = bfsQ.front(); boost::tie(nbrIdx,endNbrs) = mol.getAtomNeighbors(mol.getAtomWithIdx(curAid)); while (nbrIdx != endNbrs) { if (doneAtms[*nbrIdx] == 0) { pred[*nbrIdx] = curAid; if (static_cast<int>(*nbrIdx) == aid2) { done = true; break; } bfsQ.push_back(*nbrIdx); } nbrIdx++; } doneAtms[curAid] = 1; bfsQ.pop_front(); } INT_LIST res; if(done){ done = false; int prev = aid2; res.push_back(aid2); while (!done) { prev = pred[prev]; if (prev != aid1) { res.push_front(prev); } else { done = true; } } res.push_front(aid1); } return res; }
double _sumOfWeights(const DoubleVector &weights) { const double *wData = weights.getData(); double res = 0.0; for (unsigned int i = 0; i < weights.size(); i++) { CHECK_INVARIANT(wData[i] > 0.0, "Negative weight specified for a point"); res += wData[i]; } return res; }
unsigned int SubstructLibrary::addMol(const ROMol &m) { unsigned int size = mols->addMol(m); if (fps) { unsigned int fpsize = fps->addMol(m); CHECK_INVARIANT(size == fpsize, "#mols different than #fingerprints in SubstructLibrary"); } return size; }
ForceFields::ForceField *constructForceField( const BoundsMatrix &mmat, RDGeom::PointPtrVect &positions, const VECT_CHIRALSET &csets, double weightChiral, double weightFourthDim, std::map<std::pair<int, int>, double> *extraWeights, double basinSizeTol) { unsigned int N = mmat.numRows(); CHECK_INVARIANT(N == positions.size(), ""); ForceFields::ForceField *field = new ForceFields::ForceField(positions[0]->dimension()); for (unsigned int i = 0; i < N; i++) { field->positions().push_back(positions[i]); } for (unsigned int i = 1; i < N; i++) { for (unsigned int j = 0; j < i; j++) { double w = 1.0; double l = mmat.getLowerBound(i, j); double u = mmat.getUpperBound(i, j); bool includeIt = false; if (extraWeights) { std::map<std::pair<int, int>, double>::const_iterator mapIt; mapIt = extraWeights->find(std::make_pair(i, j)); if (mapIt != extraWeights->end()) { w = mapIt->second; includeIt = true; } } if (u - l <= basinSizeTol) { includeIt = true; } if (includeIt) { DistViolationContrib *contrib = new DistViolationContrib(field, i, j, u, l, w); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } } } // now add chiral constraints if (weightChiral > 1.e-8) { for (VECT_CHIRALSET::const_iterator csi = csets.begin(); csi != csets.end(); csi++) { ChiralViolationContrib *contrib = new ChiralViolationContrib(field, csi->get(), weightChiral); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } } // finally the contribution from the fourth dimension if we need to if ((field->dimension() == 4) && (weightFourthDim > 1.e-8)) { for (unsigned int i = 1; i < N; i++) { FourthDimContrib *contrib = new FourthDimContrib(field, i, weightFourthDim); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } } return field; } // constructForceField
SmilesMolSupplier::SmilesMolSupplier(std::istream *inStream, bool takeOwnership, const std::string &delimiter, int smilesColumn, int nameColumn, bool titleLine, bool sanitize) { CHECK_INVARIANT(inStream, "bad instream"); CHECK_INVARIANT(!(inStream->eof()), "early EOF"); init(); dp_inStream = inStream; df_owner = takeOwnership; d_delim = delimiter; df_sanitize = sanitize; df_title = titleLine; d_smi = smilesColumn; d_name = nameColumn; df_end = false; this->checkForEnd(); POSTCONDITION(dp_inStream, "bad instream"); }
TDTWriter::~TDTWriter() { // if we've written any mols, finish with a "|" line if (d_molid > 0) { CHECK_INVARIANT(dp_ostream,"null outstream even though molecules were written"); (*dp_ostream) << "|\n"; } if (df_owner) { delete dp_ostream; } }
void InfoBitRanker::accumulateVotes(const ExplicitBitVect &bv, unsigned int label) { RANGE_CHECK(0, label, d_classes-1); CHECK_INVARIANT(bv.getNumBits() == d_dims, "Incorrect bit vector size"); d_nInst += 1; d_clsCount[label] += 1; for (unsigned int i=0;i<bv.getNumBits();i++){ if( (*bv.dp_bits)[i] && (!dp_maskBits || dp_maskBits->getBit(i)) ){ d_counts[label][i] += 1; } } }
bool computeRandomCoords(RDGeom::PointPtrVect &positions, double boxSize){ CHECK_INVARIANT(boxSize>0.0, "bad boxSize"); for(RDGeom::PointPtrVect::iterator ptIt=positions.begin(); ptIt!=positions.end();++ptIt){ RDGeom::Point *pt = *ptIt; for (unsigned int i = 0; i<pt->dimension(); ++i) { (*pt)[i]=boxSize*(RDKit::getRandomVal()-0.5); } } return true; }
void addMissingProductBonds(const Bond &origB, RWMOL_SPTR product, ReactantProductAtomMapping *mapping) { unsigned int begIdx = origB.getBeginAtomIdx(); unsigned int endIdx = origB.getEndAtomIdx(); std::vector<unsigned> prodBeginIdxs = mapping->reactProdAtomMap[begIdx]; std::vector<unsigned> prodEndIdxs = mapping->reactProdAtomMap[endIdx]; CHECK_INVARIANT(prodBeginIdxs.size() == prodEndIdxs.size(), "Different number of start-end points for product bonds."); for (unsigned i = 0; i < prodBeginIdxs.size(); i++) { setNewProductBond(origB, product, prodBeginIdxs.at(i), prodEndIdxs.at(i)); } }
void InfoBitRanker::setBiasList(RDKit::INT_VECT &classList) { RANGE_CHECK(0, classList.size(), d_classes); d_biasList = classList; //make sure we don't have any duplicates std::sort(d_biasList.begin(), d_biasList.end()); RDKit::INT_VECT_CI bi = std::unique(d_biasList.begin(), d_biasList.end()); CHECK_INVARIANT(bi == d_biasList.end(), "There are duplicates in the class bias list"); // finally make sure all the class ID in d_biasList are within range for (bi = d_biasList.begin(); bi != d_biasList.end(); bi++) { RANGE_CHECK(0, static_cast<unsigned int>(*bi), d_classes-1); } }
ReactantProductAtomMapping *getAtomMappingsReactantProduct( const MatchVectType &match, const ROMol &reactantTemplate, RWMOL_SPTR product, unsigned numReactAtoms) { auto *mapping = new ReactantProductAtomMapping(numReactAtoms); // keep track of which mapped atoms in the reactant template are bonded to // each other. // This is part of the fix for #1387 { ROMol::EDGE_ITER firstB, lastB; boost::tie(firstB, lastB) = reactantTemplate.getEdges(); while (firstB != lastB) { const Bond *bond = reactantTemplate[*firstB]; // this will put in pairs with 0s for things that aren't mapped, but we // don't care about that int a1mapidx = bond->getBeginAtom()->getAtomMapNum(); int a2mapidx = bond->getEndAtom()->getAtomMapNum(); if (a1mapidx > a2mapidx) std::swap(a1mapidx, a2mapidx); mapping->reactantTemplateAtomBonds[std::make_pair(a1mapidx, a2mapidx)] = 1; ++firstB; } } for (const auto &i : match) { const Atom *templateAtom = reactantTemplate.getAtomWithIdx(i.first); int molAtomMapNumber; if (templateAtom->getPropIfPresent(common_properties::molAtomMapNumber, molAtomMapNumber)) { if (product->hasAtomBookmark(molAtomMapNumber)) { RWMol::ATOM_PTR_LIST atomIdxs = product->getAllAtomsWithBookmark(molAtomMapNumber); for (auto a : atomIdxs) { unsigned int pIdx = a->getIdx(); mapping->reactProdAtomMap[i.second].push_back(pIdx); mapping->mappedAtoms[i.second] = 1; CHECK_INVARIANT(pIdx < product->getNumAtoms(), "yikes!"); mapping->prodReactAtomMap[pIdx] = i.second; } } else { // this skippedAtom has an atomMapNumber, but it's not in this product // (it's either in another product or it's not mapped at all). mapping->skippedAtoms[i.second] = 1; } } else { // This skippedAtom appears in the match, but not in a product: mapping->skippedAtoms[i.second] = 1; } } return mapping; }
void InfoBitRanker::accumulateVotes(const SparseBitVect &bv, unsigned int label) { RANGE_CHECK(0, label, d_classes-1); CHECK_INVARIANT(bv.getNumBits() == d_dims, "Incorrect bit vector size"); d_nInst += 1; d_clsCount[label] += 1; for (IntSet::const_iterator obi = bv.dp_bits->begin(); obi != bv.dp_bits->end(); ++obi) { if(!dp_maskBits || dp_maskBits->getBit(*obi)){ d_counts[label][(*obi)] += 1; } } }
ForceFields::ForceField *construct3DImproperForceField( const BoundsMatrix &mmat, RDGeom::Point3DPtrVect &positions, const std::vector<std::vector<int> > &improperAtoms, const std::vector<int> &atomNums) { (void)atomNums; unsigned int N = mmat.numRows(); CHECK_INVARIANT(N == positions.size(), ""); ForceFields::ForceField *field = new ForceFields::ForceField(positions[0]->dimension()); for (unsigned int i = 0; i < N; ++i) { field->positions().push_back(positions[i]); } // improper torsions / out-of-plane bend / inversion double oobForceScalingFactor = 10.0; for (unsigned int t = 0; t < improperAtoms.size(); ++t) { std::vector<int> n(4); for (unsigned int i = 0; i < 3; ++i) { n[1] = 1; switch (i) { case 0: n[0] = 0; n[2] = 2; n[3] = 3; break; case 1: n[0] = 0; n[2] = 3; n[3] = 2; break; case 2: n[0] = 2; n[2] = 3; n[3] = 0; break; } ForceFields::UFF::InversionContrib *contrib = new ForceFields::UFF::InversionContrib( field, improperAtoms[t][n[0]], improperAtoms[t][n[1]], improperAtoms[t][n[2]], improperAtoms[t][n[3]], improperAtoms[t][4], improperAtoms[t][5], oobForceScalingFactor); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } } return field; } // construct3DImproperForceField
unsigned int SubstanceGroup::getIndexInMol() const { PRECONDITION(dp_mol, "SubstanceGroup is not owned by any molecule"); const auto &sgroups = getSubstanceGroups(*dp_mol); CHECK_INVARIANT(!sgroups.empty(), "No SubstanceGroups found on owning molecule"); auto match_sgroup = [&](const SubstanceGroup &sg) { return this == &sg; }; auto sgroupItr = std::find_if(sgroups.begin(), sgroups.end(), match_sgroup); if (sgroupItr == sgroups.end()) { std::ostringstream errout; errout << "Unable to find own index in owning mol SubstanceGroup collection" << std::endl; throw SubstanceGroupException(errout.str()); } return sgroupItr - sgroups.begin(); }
// ---------------------------------------------------------------------- // // Grabs and returns the next molecule from the input stream. // After processing the line, the file is advanced to the next // position in the file (skipping blank and comment lines). // // Throws a FileParseException if EOF has already been hit. // ROMol *SmilesMolSupplier::next() { PRECONDITION(dp_inStream, "no stream"); ROMol *res = NULL; if (d_next < 0) { d_next = 0; } // This throws an exception if it fails: moveTo(d_next); CHECK_INVARIANT(static_cast<int>(d_molpos.size()) > d_next, "bad index length"); // --------- // if we get here we can just build the molecule: // --------- // set the stream to the relevant position: dp_inStream->clear(); // clear the EOF tag if it has been set dp_inStream->seekg(d_molpos[d_next]); d_line = d_lineNums[d_next]; // grab the line: std::string inLine = getLine(dp_inStream); // and process it: res = this->processLine(inLine); // if we don't already know the length of the supplier, // check if we can read another line: if (d_len < 0 && this->skipComments() < 0) { d_len = d_molpos.size(); } // make sure the line number is correct: if (d_next < static_cast<int>(d_lineNums.size())) { d_line = d_lineNums[d_next]; } ++d_next; // if we just hit the last one, simulate EOF: if (d_len > 0 && d_next == d_len) { df_end = true; } return res; }
void getListQueryVals(const Atom::QUERYATOM_QUERY *q,INT_VECT &vals){ // list queries are series of nested ors of AtomAtomicNum queries PRECONDITION(q,"bad query"); std::string descr=q->getDescription(); PRECONDITION(descr=="AtomOr","bad query"); if(descr=="AtomOr"){ for(Atom::QUERYATOM_QUERY::CHILD_VECT_CI cIt=q->beginChildren(); cIt!=q->endChildren();++cIt){ std::string descr=(*cIt)->getDescription(); CHECK_INVARIANT((descr=="AtomOr"||descr=="AtomAtomicNum"),"bad query"); // we don't allow negation of any children of the query: if(descr=="AtomOr"){ getListQueryVals((*cIt).get(),vals); } else if(descr=="AtomAtomicNum"){ vals.push_back(static_cast<ATOM_EQUALS_QUERY *>((*cIt).get())->getVal()); } } } }
PyObject *getEuclideanDistMat(python::object descripMat) { // Bit of a pain involved here, we accept three types of PyObjects here // 1. A Numeric Array // - first find what 'type' of entry we have (float, double and int is all we recognize for now) // - then point to contiguous piece of memory from the array that contains the data with a type* // - then make a new type** pointer so that double index into this contiguous memory will work // and then pass it along to the distance calculator // 2. A list of Numeric Vector (or 1D arrays) // - in this case wrap descripMat with a PySequenceHolder<type*> where type is the // type of entry in vector (accepted types are int, double and float // - Then pass the PySequenceHolder to the metrci calculator // 3. A list (or tuple) of lists (or tuple) // - In this case other than wrapping descripMat with a PySequenceHolder // each of the indivual list in there are also wrapped by a PySequenceHolder // - so the distance calculator is passed in a "PySequenceHolder<PySequenceHolder<double>>" // - FIX: not that we always convert entry values to double here, even if we passed // in a list of list of ints (or floats). Given that lists can be heterogeneous, I do not // know how to ask a list what type of entries if contains. // // OK my brain is going to explode now // first deal with situation where we have an Numeric Array PyObject *descMatObj = descripMat.ptr(); PyArrayObject *distRes; if (PyArray_Check(descMatObj)) { // get the dimensions of the array int nrows = ((PyArrayObject *)descMatObj)->dimensions[0]; int ncols = ((PyArrayObject *)descMatObj)->dimensions[1]; int i; CHECK_INVARIANT((nrows > 0) && (ncols > 0), ""); npy_intp dMatLen = nrows*(nrows-1)/2; // now that we have the dimensions declare the distance matrix which is always a // 1D double array distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE); // grab a pointer to the data in the array so that we can directly put values in there // and avoid copying : double *dMat = (double *)distRes->data; PyArrayObject *copy; copy = (PyArrayObject *)PyArray_ContiguousFromObject(descMatObj, ((PyArrayObject *)descMatObj)->descr->type_num, 2,2); // if we have double array if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_DOUBLE) { double *desc = (double *)copy->data; // REVIEW: create an adaptor object to hold a double * and support // operator[]() so that we don't have to do this stuff: // here is the 2D array trick this so that when the distance calaculator // asks for desc2D[i] we basically get the ith row as double* double **desc2D = new double*[nrows]; for (i = 0; i < nrows; i++) { desc2D[i] = desc; desc += ncols; } MetricMatrixCalc<double**, double*> mmCalc; mmCalc.setMetricFunc(&EuclideanDistanceMetric<double *, double *>); mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat); delete [] desc2D; // we got the distance matrix we are happy so return return PyArray_Return(distRes); } // if we have a float array else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_FLOAT) { float* desc = (float *)copy->data; float **desc2D = new float*[nrows]; for (i = 0; i < nrows; i++) { desc2D[i] = desc; desc += ncols; } MetricMatrixCalc<float**, float*> mmCalc; mmCalc.setMetricFunc(&EuclideanDistanceMetric<float *, float*>); mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat); delete [] desc2D; return PyArray_Return(distRes); } // if we have an interger array else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_INT) { int *desc = (int *)copy->data; int **desc2D = new int*[nrows]; for (i = 0; i < nrows; i++) { desc2D[i] = desc; desc += ncols; } MetricMatrixCalc<int**, int*> mmCalc; mmCalc.setMetricFunc(&EuclideanDistanceMetric<int *, int*>); mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat); delete [] desc2D; return PyArray_Return(distRes); } else { // unreconiged type for the matrix, throw up throw_value_error("The array has to be of type int, float, or double for GetEuclideanDistMat"); } } // done with an array input else { // REVIEW: removed a ton of code here // we have probably have a list or a tuple unsigned int ncols = 0; unsigned int nrows = python::extract<unsigned int>(descripMat.attr("__len__")()); CHECK_INVARIANT(nrows > 0, "Empty list passed in"); npy_intp dMatLen = nrows*(nrows-1)/2; distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE); double *dMat = (double *)distRes->data; // assume that we a have a list of list of values (that can be extracted to double) std::vector<PySequenceHolder<double> > dData; dData.reserve(nrows); for (unsigned int i = 0; i < nrows; i++) { //PySequenceHolder<double> row(seq[i]); PySequenceHolder<double> row(descripMat[i]); if(i==0) { ncols = row.size(); } else if( row.size() != ncols ) { throw_value_error("All subsequences must be the same length"); } dData.push_back(row); } MetricMatrixCalc< std::vector<PySequenceHolder<double> >, PySequenceHolder<double> > mmCalc; mmCalc.setMetricFunc(&EuclideanDistanceMetric< PySequenceHolder<double>, PySequenceHolder<double> >); mmCalc.calcMetricMatrix(dData, nrows, ncols, dMat); } return PyArray_Return(distRes); }
void TDTWriter::write(const ROMol &mol, int confId) { CHECK_INVARIANT(dp_ostream,"no output stream"); //start by writing a "|" line unless this is the first line if (d_molid > 0) { (*dp_ostream) << "|\n"; } // write the molecule (*dp_ostream) << "$SMI<" << MolToSmiles(mol) << ">\n"; if(df_writeNames && mol.hasProp("_Name")){ std::string name; mol.getProp("_Name",name); (*dp_ostream) << "NAME<" << name << ">\n"; } // do we need to write coordinates? if(mol.getNumConformers()){ // get the ordering of the atoms in the output SMILES: std::vector<unsigned int> atomOrdering; mol.getProp("_smilesAtomOutputOrder",atomOrdering); const Conformer &conf = mol.getConformer(confId); if(df_write2D){ (*dp_ostream) << "2D<"; } else { (*dp_ostream) << "3D<"; } const RDGeom::POINT3D_VECT &coords=conf.getPositions(); int nAts=atomOrdering.size(); for(int i=0;i<nAts;i++){ (*dp_ostream) << std::setprecision(d_numDigits) << coords[atomOrdering[i]].x << ","; (*dp_ostream) << std::setprecision(d_numDigits) << coords[atomOrdering[i]].y; if(!df_write2D){ (*dp_ostream) << "," << std::setprecision(d_numDigits) << coords[atomOrdering[i]].z; } if(i!=nAts-1) (*dp_ostream) << ","; } (*dp_ostream) << ";>\n"; } // now write the properties STR_VECT_CI pi; if (d_props.size() > 0) { // check if we have any properties the user specified to write out // in which loop over them and write them out for (pi = d_props.begin(); pi != d_props.end(); pi++) { if (mol.hasProp(*pi)) { writeProperty(mol, (*pi)); } } } else { // if use did not specify any properties, write all non computed properties // out to the file STR_VECT properties = mol.getPropList(); STR_VECT compLst; if (mol.hasProp(detail::computedPropName)) { mol.getProp(detail::computedPropName, compLst); } STR_VECT_CI pi; for (pi = properties.begin(); pi != properties.end(); pi++) { // ignore any of the following properties if ( ((*pi) == detail::computedPropName) || ((*pi) == "_Name") || ((*pi) == "_MolFileInfo") || ((*pi) == "_MolFileComments") || ((*pi) == "_MolFileChiralFlag")) { continue; } // check if this property is not computed if (std::find(compLst.begin(), compLst.end(), (*pi)) == compLst.end()) { writeProperty(mol, (*pi)); } } } d_molid++; }
// ------------------------------------------------------------------------ // // // // ------------------------------------------------------------------------ void addTrigonalBipyramidAngles(const Atom *atom,const ROMol &mol, int confId, const AtomicParamVect ¶ms, ForceFields::ForceField *field){ PRECONDITION(atom,"bad atom"); PRECONDITION(atom->getHybridization()==Atom::SP3D,"bad hybridization"); PRECONDITION(atom->getDegree()==5,"bad degree"); PRECONDITION(mol.getNumAtoms()==params.size(),"bad parameters"); PRECONDITION(field,"bad forcefield"); const Bond *ax1=0,*ax2=0; const Bond *eq1=0,*eq2=0,*eq3=0; const Conformer &conf = mol.getConformer(confId); //------------------------------------------------------------ // identify the axial and equatorial bonds: double mostNeg=100.0; ROMol::OEDGE_ITER beg1,end1; boost::tie(beg1,end1) = mol.getAtomBonds(atom); unsigned int aid = atom->getIdx(); while(beg1!=end1){ const Bond *bond1=mol[*beg1].get(); unsigned int oaid = bond1->getOtherAtomIdx(aid); RDGeom::Point3D v1=conf.getAtomPos(aid).directionVector(conf.getAtomPos(oaid)); ROMol::OEDGE_ITER beg2,end2; boost::tie(beg2,end2) = mol.getAtomBonds(atom); while(beg2 != end2){ const Bond *bond2=mol[*beg2].get(); if(bond2->getIdx() > bond1->getIdx()){ unsigned int oaid2 = bond2->getOtherAtomIdx(aid); RDGeom::Point3D v2=conf.getAtomPos(aid).directionVector(conf.getAtomPos(oaid2)); double dot=v1.dotProduct(v2); if(dot<mostNeg){ mostNeg = dot; ax1 = bond1; ax2 = bond2; } } ++beg2; } ++beg1; } CHECK_INVARIANT(ax1,"axial bond not found"); CHECK_INVARIANT(ax2,"axial bond not found"); boost::tie(beg1,end1) = mol.getAtomBonds(atom); while(beg1!=end1){ const Bond *bond=mol[*beg1].get(); ++beg1; if(bond==ax1 || bond==ax2) continue; if(!eq1) eq1=bond; else if(!eq2) eq2=bond; else if(!eq3) eq3=bond; } CHECK_INVARIANT(eq1,"equatorial bond not found"); CHECK_INVARIANT(eq2,"equatorial bond not found"); CHECK_INVARIANT(eq3,"equatorial bond not found"); //------------------------------------------------------------ // alright, add the angles: AngleBendContrib *contrib; int atomIdx=atom->getIdx(); int i,j; // Axial-Axial i=ax1->getOtherAtomIdx(atomIdx); j=ax2->getOtherAtomIdx(atomIdx); if(params[i]&¶ms[j]){ contrib = new AngleBendContrib(field,i,atomIdx,j, ax1->getBondTypeAsDouble(), ax2->getBondTypeAsDouble(), params[i],params[atomIdx],params[j],2); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } // Equatorial-Equatorial i=eq1->getOtherAtomIdx(atomIdx); j=eq2->getOtherAtomIdx(atomIdx); if(params[i]&¶ms[j]){ contrib = new AngleBendContrib(field,i,atomIdx,j, eq1->getBondTypeAsDouble(), eq2->getBondTypeAsDouble(), params[i],params[atomIdx],params[j],3); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } i=eq1->getOtherAtomIdx(atomIdx); j=eq3->getOtherAtomIdx(atomIdx); if(params[i]&¶ms[j]){ contrib = new AngleBendContrib(field,i,atomIdx,j, eq1->getBondTypeAsDouble(), eq3->getBondTypeAsDouble(), params[i],params[atomIdx],params[j],3); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } i=eq2->getOtherAtomIdx(atomIdx); j=eq3->getOtherAtomIdx(atomIdx); if(params[i]&¶ms[j]){ contrib = new AngleBendContrib(field,i,atomIdx,j, eq2->getBondTypeAsDouble(), eq3->getBondTypeAsDouble(), params[i],params[atomIdx],params[j],3); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } // Axial-Equatorial i=ax1->getOtherAtomIdx(atomIdx); j=eq1->getOtherAtomIdx(atomIdx); if(params[i]&¶ms[j]){ contrib = new AngleBendContrib(field,i,atomIdx,j, ax1->getBondTypeAsDouble(), eq1->getBondTypeAsDouble(), params[i],params[atomIdx],params[j]); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } i=ax1->getOtherAtomIdx(atomIdx); j=eq2->getOtherAtomIdx(atomIdx); if(params[i]&¶ms[j]){ contrib = new AngleBendContrib(field,i,atomIdx,j, ax1->getBondTypeAsDouble(), eq2->getBondTypeAsDouble(), params[i],params[atomIdx],params[j]); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } i=ax1->getOtherAtomIdx(atomIdx); j=eq3->getOtherAtomIdx(atomIdx); if(params[i]&¶ms[j]){ contrib = new AngleBendContrib(field,i,atomIdx,j, ax1->getBondTypeAsDouble(), eq3->getBondTypeAsDouble(), params[i],params[atomIdx],params[j]); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } i=ax2->getOtherAtomIdx(atomIdx); j=eq1->getOtherAtomIdx(atomIdx); if(params[i]&¶ms[j]){ contrib = new AngleBendContrib(field,i,atomIdx,j, ax2->getBondTypeAsDouble(), eq1->getBondTypeAsDouble(), params[i],params[atomIdx],params[j]); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } i=ax2->getOtherAtomIdx(atomIdx); j=eq2->getOtherAtomIdx(atomIdx); if(params[i]&¶ms[j]){ contrib = new AngleBendContrib(field,i,atomIdx,j, ax2->getBondTypeAsDouble(), eq2->getBondTypeAsDouble(), params[i],params[atomIdx],params[j]); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } i=ax2->getOtherAtomIdx(atomIdx); j=eq3->getOtherAtomIdx(atomIdx); if(params[i]&¶ms[j]){ contrib = new AngleBendContrib(field,i,atomIdx,j, ax2->getBondTypeAsDouble(), eq3->getBondTypeAsDouble(), params[i],params[atomIdx],params[j]); field->contribs().push_back(ForceFields::ContribPtr(contrib)); } }