PyO3A *getMMFFO3A(ROMol &prbMol, ROMol &refMol, python::object prbProps, python::object refProps, int prbCid = -1, int refCid = -1, bool reflect = false, unsigned int maxIters = 50, unsigned int options = 0, python::list constraintMap = python::list(), python::list constraintWeights = python::list()) { MatchVectType *cMap = (python::len(constraintMap) ? _translateAtomMap(constraintMap) : NULL); RDNumeric::DoubleVector *cWts = NULL; if (cMap) { cWts = _translateWeights(constraintWeights); if (cWts) { if ((*cMap).size() != (*cWts).size()) { throw_value_error( "The number of weights should match the number of constraints"); } } for (unsigned int i = 0; i < (*cMap).size(); ++i) { if (((*cMap)[i].first < 0) || ((*cMap)[i].first >= rdcast<int>(prbMol.getNumAtoms())) || ((*cMap)[i].second < 0) || ((*cMap)[i].second >= rdcast<int>(refMol.getNumAtoms()))) { throw_value_error("Constrained atom idx out of range"); } if ((prbMol[(*cMap)[i].first]->getAtomicNum() == 1) || (refMol[(*cMap)[i].second]->getAtomicNum() == 1)) { throw_value_error("Constrained atoms must be heavy atoms"); } } } ForceFields::PyMMFFMolProperties *prbPyMMFFMolProperties = NULL; MMFF::MMFFMolProperties *prbMolProps = NULL; ForceFields::PyMMFFMolProperties *refPyMMFFMolProperties = NULL; MMFF::MMFFMolProperties *refMolProps = NULL; if (prbProps != python::object()) { prbPyMMFFMolProperties = python::extract<ForceFields::PyMMFFMolProperties *>(prbProps); prbMolProps = prbPyMMFFMolProperties->mmffMolProperties.get(); } else { prbMolProps = new MMFF::MMFFMolProperties(prbMol); if (!prbMolProps->isValid()) { throw_value_error("missing MMFF94 parameters for probe molecule"); } } if (refProps != python::object()) { refPyMMFFMolProperties = python::extract<ForceFields::PyMMFFMolProperties *>(refProps); refMolProps = refPyMMFFMolProperties->mmffMolProperties.get(); } else { refMolProps = new MMFF::MMFFMolProperties(refMol); if (!refMolProps->isValid()) { throw_value_error("missing MMFF94 parameters for reference molecule"); } } O3A *o3a; { NOGIL gil; o3a = new MolAlign::O3A(prbMol, refMol, prbMolProps, refMolProps, MolAlign::O3A::MMFF94, prbCid, refCid, reflect, maxIters, options, cMap, cWts); } PyO3A *pyO3A = new PyO3A(o3a); if (!prbPyMMFFMolProperties) delete prbMolProps; if (!refPyMMFFMolProperties) delete refMolProps; if (cMap) { delete cMap; } if (cWts) { delete cWts; } return pyO3A; }
/*********************************************** Recursively finds the best quantization boundaries **Arguments** - vals: a 1D Numeric array with the values of the variables, this should be sorted - cuts: a list with the indices of the quantization bounds (indices are into _starts_ ) - which: an integer indicating which bound is being adjusted here (and index into _cuts_ ) - starts: a list of potential starting points for quantization bounds - results: a 1D Numeric array of integer result codes - nPossibleRes: an integer with the number of possible result codes **Returns** - a 2-tuple containing: 1) the best information gain found so far 2) a list of the quantization bound indices ( _cuts_ for the best case) **Notes** - this is not even remotely efficient, which is why a C replacement was written - this is a drop-in replacement for *ML.Data.Quantize._PyRecurseBounds* ***********************************************/ static python::tuple cQuantize_RecurseOnBounds(python::object vals, python::list pyCuts, int which, python::list pyStarts, python::object results, int nPossibleRes) { PyArrayObject *contigVals,*contigResults; long int *cuts,*starts; /* ------- Setup code ------- */ contigVals = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(vals.ptr(),PyArray_DOUBLE,1,1)); if(!contigVals){ throw_value_error("could not convert value argument"); } contigResults = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(results.ptr(),PyArray_LONG,1,1)); if(!contigResults){ throw_value_error("could not convert results argument"); } python::ssize_t nCuts = python::len(pyCuts); cuts = (long int *)calloc(nCuts,sizeof(long int)); for (python::ssize_t i=0; i<nCuts; i++) { python::object elem = pyCuts[i]; cuts[i] = python::extract<long int>(elem); } python::ssize_t nStarts = python::len(pyStarts); starts = (long int *)calloc(nStarts,sizeof(long int)); for (python::ssize_t i=0; i<nStarts; i++){ python::object elem = pyStarts[i]; starts[i] = python::extract<long int>(elem); } // do the real work double gain = RecurseHelper((double *)contigVals->data,contigVals->dimensions[0], cuts,nCuts,which,starts,nStarts, (long int *)contigResults->data,nPossibleRes); /* ------- Construct the return value ------- */ python::list cutObj; for (python::ssize_t i=0; i<nCuts; i++) { cutObj.append(cuts[i]); } free(cuts); free(starts); return python::make_tuple(gain, cutObj); }
static python::list cQuantize_FindStartPoints(python::object values, python::object results, int nData) { python::list startPts; if(nData<2){ return startPts; } PyArrayObject *contigVals = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(values.ptr(),PyArray_DOUBLE,1,1)); if(!contigVals){ throw_value_error("could not convert value argument"); } double *vals=(double *)contigVals->data; PyArrayObject *contigResults = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(results.ptr(),PyArray_LONG,1,1)); if(!contigResults){ throw_value_error("could not convert results argument"); } long *res=(long *)contigResults->data; bool firstBlock=true; long lastBlockAct=-2,blockAct=res[0]; int lastDiv=-1; double tol=1e-8; int i=1; while(i<nData){ while(i<nData && vals[i]-vals[i-1]<=tol){ if(res[i]!=blockAct){ blockAct=-1; } ++i; } if(firstBlock){ firstBlock=false; lastBlockAct=blockAct; lastDiv=i; } else { if(blockAct==-1 || lastBlockAct==-1 || blockAct!=lastBlockAct){ startPts.append(lastDiv); lastDiv=i; lastBlockAct=blockAct; } else { lastDiv=i; } } if(i<nData) blockAct=res[i]; ++i; } // catch the case that the last point also sets a bin: if( blockAct != lastBlockAct ){ startPts.append(lastDiv); } return startPts; }
PyObject *getEuclideanDistMat(python::object descripMat) { // Bit of a pain involved here, we accept three types of PyObjects here // 1. A Numeric Array // - first find what 'type' of entry we have (float, double and int is all // we recognize for now) // - then point to contiguous piece of memory from the array that contains // the data with a type* // - then make a new type** pointer so that double index into this // contiguous memory will work // and then pass it along to the distance calculator // 2. A list of Numeric Vector (or 1D arrays) // - in this case wrap descripMat with a PySequenceHolder<type*> where // type is the // type of entry in vector (accepted types are int, double and float // - Then pass the PySequenceHolder to the metrci calculator // 3. A list (or tuple) of lists (or tuple) // - In this case other than wrapping descripMat with a PySequenceHolder // each of the indivual list in there are also wrapped by a // PySequenceHolder // - so the distance calculator is passed in a // "PySequenceHolder<PySequenceHolder<double>>" // - FIX: not that we always convert entry values to double here, even if // we passed // in a list of list of ints (or floats). Given that lists can be // heterogeneous, I do not // know how to ask a list what type of entries if contains. // // OK my brain is going to explode now // first deal with situation where we have an Numeric Array PyObject *descMatObj = descripMat.ptr(); PyArrayObject *distRes; if (PyArray_Check(descMatObj)) { // get the dimensions of the array int nrows = ((PyArrayObject *)descMatObj)->dimensions[0]; int ncols = ((PyArrayObject *)descMatObj)->dimensions[1]; int i; CHECK_INVARIANT((nrows > 0) && (ncols > 0), ""); npy_intp dMatLen = nrows * (nrows - 1) / 2; // now that we have the dimensions declare the distance matrix which is // always a // 1D double array distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE); // grab a pointer to the data in the array so that we can directly put // values in there // and avoid copying : double *dMat = (double *)distRes->data; PyArrayObject *copy; copy = (PyArrayObject *)PyArray_ContiguousFromObject( descMatObj, ((PyArrayObject *)descMatObj)->descr->type_num, 2, 2); // if we have double array if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_DOUBLE) { double *desc = (double *)copy->data; // REVIEW: create an adaptor object to hold a double * and support // operator[]() so that we don't have to do this stuff: // here is the 2D array trick this so that when the distance calaculator // asks for desc2D[i] we basically get the ith row as double* double **desc2D = new double *[nrows]; for (i = 0; i < nrows; i++) { desc2D[i] = desc; desc += ncols; } MetricMatrixCalc<double **, double *> mmCalc; mmCalc.setMetricFunc(&EuclideanDistanceMetric<double *, double *>); mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat); delete[] desc2D; // we got the distance matrix we are happy so return return PyArray_Return(distRes); } // if we have a float array else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_FLOAT) { float *desc = (float *)copy->data; float **desc2D = new float *[nrows]; for (i = 0; i < nrows; i++) { desc2D[i] = desc; desc += ncols; } MetricMatrixCalc<float **, float *> mmCalc; mmCalc.setMetricFunc(&EuclideanDistanceMetric<float *, float *>); mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat); delete[] desc2D; return PyArray_Return(distRes); } // if we have an interger array else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_INT) { int *desc = (int *)copy->data; int **desc2D = new int *[nrows]; for (i = 0; i < nrows; i++) { desc2D[i] = desc; desc += ncols; } MetricMatrixCalc<int **, int *> mmCalc; mmCalc.setMetricFunc(&EuclideanDistanceMetric<int *, int *>); mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat); delete[] desc2D; return PyArray_Return(distRes); } else { // unreconiged type for the matrix, throw up throw_value_error( "The array has to be of type int, float, or double for " "GetEuclideanDistMat"); } } // done with an array input else { // REVIEW: removed a ton of code here // we have probably have a list or a tuple unsigned int ncols = 0; unsigned int nrows = python::extract<unsigned int>(descripMat.attr("__len__")()); CHECK_INVARIANT(nrows > 0, "Empty list passed in"); npy_intp dMatLen = nrows * (nrows - 1) / 2; distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE); double *dMat = (double *)distRes->data; // assume that we a have a list of list of values (that can be extracted to // double) std::vector<PySequenceHolder<double> > dData; dData.reserve(nrows); for (unsigned int i = 0; i < nrows; i++) { // PySequenceHolder<double> row(seq[i]); PySequenceHolder<double> row(descripMat[i]); if (i == 0) { ncols = row.size(); } else if (row.size() != ncols) { throw_value_error("All subsequences must be the same length"); } dData.push_back(row); } MetricMatrixCalc<std::vector<PySequenceHolder<double> >, PySequenceHolder<double> > mmCalc; mmCalc.setMetricFunc(&EuclideanDistanceMetric<PySequenceHolder<double>, PySequenceHolder<double> >); mmCalc.calcMetricMatrix(dData, nrows, ncols, dMat); } return PyArray_Return(distRes); }
PyObject *embedBoundsMatrix(python::object boundsMatArg,int maxIters=10, bool randomizeOnFailure=false,int numZeroFail=2, python::list weights=python::list(), int randomSeed=-1){ PyObject *boundsMatObj = boundsMatArg.ptr(); if(!PyArray_Check(boundsMatObj)) throw_value_error("Argument isn't an array"); PyArrayObject *boundsMat=reinterpret_cast<PyArrayObject *>(boundsMatObj); // get the dimensions of the array unsigned int nrows = boundsMat->dimensions[0]; unsigned int ncols = boundsMat->dimensions[1]; if(nrows!=ncols) throw_value_error("The array has to be square"); if(nrows<=0) throw_value_error("The array has to have a nonzero size"); if (boundsMat->descr->type_num != PyArray_DOUBLE) throw_value_error("Only double arrays are currently supported"); unsigned int dSize = nrows*nrows; double *cData = new double[dSize]; double *inData = reinterpret_cast<double *>(boundsMat->data); memcpy(static_cast<void *>(cData), static_cast<const void *>(inData), dSize*sizeof(double)); DistGeom::BoundsMatrix::DATA_SPTR sdata(cData); DistGeom::BoundsMatrix bm(nrows,sdata); RDGeom::Point3D *positions=new RDGeom::Point3D[nrows]; std::vector<RDGeom::Point *> posPtrs; for (unsigned int i = 0; i < nrows; i++) { posPtrs.push_back(&positions[i]); } RDNumeric::DoubleSymmMatrix distMat(nrows, 0.0); // ---- ---- ---- ---- ---- ---- ---- ---- ---- // start the embedding: bool gotCoords=false; for(int iter=0;iter<maxIters && !gotCoords;iter++){ // pick a random distance matrix DistGeom::pickRandomDistMat(bm,distMat,randomSeed); // and embed it: gotCoords=DistGeom::computeInitialCoords(distMat,posPtrs,randomizeOnFailure, numZeroFail,randomSeed); // update the seed: if(randomSeed>=0) randomSeed+=iter*999; } if(gotCoords){ std::map<std::pair<int,int>,double> weightMap; unsigned int nElems=PySequence_Size(weights.ptr()); for(unsigned int entryIdx=0;entryIdx<nElems;entryIdx++){ PyObject *entry=PySequence_GetItem(weights.ptr(),entryIdx); if(!PySequence_Check(entry) || PySequence_Size(entry)!=3){ throw_value_error("weights argument must be a sequence of 3-sequences"); } int idx1=PyInt_AsLong(PySequence_GetItem(entry,0)); int idx2=PyInt_AsLong(PySequence_GetItem(entry,1)); double w=PyFloat_AsDouble(PySequence_GetItem(entry,2)); weightMap[std::make_pair(idx1,idx2)]=w; } DistGeom::VECT_CHIRALSET csets; ForceFields::ForceField *field = DistGeom::constructForceField(bm,posPtrs,csets,0.0, 0.0, &weightMap); CHECK_INVARIANT(field,"could not build dgeom force field"); field->initialize(); if(field->calcEnergy()>1e-5){ int needMore=1; while(needMore){ needMore=field->minimize(); } } delete field; } else { throw_value_error("could not embed matrix"); } // ---- ---- ---- ---- ---- ---- ---- ---- ---- // construct the results matrix: npy_intp dims[2]; dims[0] = nrows; dims[1] = 3; PyArrayObject *res = (PyArrayObject *)PyArray_SimpleNew(2,dims,NPY_DOUBLE); double *resData=reinterpret_cast<double *>(res->data); for(unsigned int i=0;i<nrows;i++){ unsigned int iTab=i*3; for (unsigned int j = 0; j < 3; ++j) { resData[iTab + j]=positions[i][j]; //.x; } } delete [] positions; return PyArray_Return(res); }
python::tuple getCrippenO3AForConfs( ROMol &prbMol, ROMol &refMol, int numThreads, python::list prbCrippenContribs, python::list refCrippenContribs, int refCid = -1, bool reflect = false, unsigned int maxIters = 50, unsigned int options = 0, python::list constraintMap = python::list(), python::list constraintWeights = python::list()) { MatchVectType *cMap = (python::len(constraintMap) ? _translateAtomMap(constraintMap) : nullptr); RDNumeric::DoubleVector *cWts = nullptr; if (cMap) { cWts = _translateWeights(constraintWeights); if (cWts) { if ((*cMap).size() != (*cWts).size()) { throw_value_error( "The number of weights should match the number of constraints"); } } for (auto &i : (*cMap)) { if ((i.first < 0) || (i.first >= rdcast<int>(prbMol.getNumAtoms())) || (i.second < 0) || (i.second >= rdcast<int>(refMol.getNumAtoms()))) { throw_value_error("Constrained atom idx out of range"); } if ((prbMol[i.first]->getAtomicNum() == 1) || (refMol[i.second]->getAtomicNum() == 1)) { throw_value_error("Constrained atoms must be heavy atoms"); } } } unsigned int prbNAtoms = prbMol.getNumAtoms(); std::vector<double> prbLogpContribs(prbNAtoms); unsigned int refNAtoms = refMol.getNumAtoms(); std::vector<double> refLogpContribs(refNAtoms); if ((prbCrippenContribs != python::list()) && (python::len(prbCrippenContribs) == prbNAtoms)) { for (unsigned int i = 0; i < prbNAtoms; ++i) { python::tuple logpMRTuple = python::extract<python::tuple>(prbCrippenContribs[i]); prbLogpContribs[i] = python::extract<double>(logpMRTuple[0]); } } else { std::vector<double> prbMRContribs(prbNAtoms); std::vector<unsigned int> prbAtomTypes(prbNAtoms); std::vector<std::string> prbAtomTypeLabels(prbNAtoms); Descriptors::getCrippenAtomContribs(prbMol, prbLogpContribs, prbMRContribs, true, &prbAtomTypes, &prbAtomTypeLabels); } if ((refCrippenContribs != python::list()) && (python::len(refCrippenContribs) == refNAtoms)) { for (unsigned int i = 0; i < refNAtoms; ++i) { python::tuple logpMRTuple = python::extract<python::tuple>(refCrippenContribs[i]); refLogpContribs[i] = python::extract<double>(logpMRTuple[0]); } } else { std::vector<double> refMRContribs(refNAtoms); std::vector<unsigned int> refAtomTypes(refNAtoms); std::vector<std::string> refAtomTypeLabels(refNAtoms); Descriptors::getCrippenAtomContribs(refMol, refLogpContribs, refMRContribs, true, &refAtomTypes, &refAtomTypeLabels); } std::vector<boost::shared_ptr<O3A>> res; { NOGIL gil; getO3AForProbeConfs(prbMol, refMol, &prbLogpContribs, &refLogpContribs, res, numThreads, MolAlign::O3A::CRIPPEN, refCid, reflect, maxIters, options, cMap, cWts); } python::list pyres; for (auto &re : res) { pyres.append(new PyO3A(re)); } if (cMap) { delete cMap; } if (cWts) { delete cWts; } return python::tuple(pyres); }
python::tuple getMMFFO3AForConfs( ROMol &prbMol, ROMol &refMol, int numThreads, python::object prbProps, python::object refProps, int refCid = -1, bool reflect = false, unsigned int maxIters = 50, unsigned int options = 0, python::list constraintMap = python::list(), python::list constraintWeights = python::list()) { MatchVectType *cMap = (python::len(constraintMap) ? _translateAtomMap(constraintMap) : nullptr); RDNumeric::DoubleVector *cWts = nullptr; if (cMap) { cWts = _translateWeights(constraintWeights); if (cWts) { if ((*cMap).size() != (*cWts).size()) { throw_value_error( "The number of weights should match the number of constraints"); } } for (auto &i : (*cMap)) { if ((i.first < 0) || (i.first >= rdcast<int>(prbMol.getNumAtoms())) || (i.second < 0) || (i.second >= rdcast<int>(refMol.getNumAtoms()))) { throw_value_error("Constrained atom idx out of range"); } if ((prbMol[i.first]->getAtomicNum() == 1) || (refMol[i.second]->getAtomicNum() == 1)) { throw_value_error("Constrained atoms must be heavy atoms"); } } } ForceFields::PyMMFFMolProperties *prbPyMMFFMolProperties = nullptr; MMFF::MMFFMolProperties *prbMolProps = nullptr; ForceFields::PyMMFFMolProperties *refPyMMFFMolProperties = nullptr; MMFF::MMFFMolProperties *refMolProps = nullptr; if (prbProps != python::object()) { prbPyMMFFMolProperties = python::extract<ForceFields::PyMMFFMolProperties *>(prbProps); prbMolProps = prbPyMMFFMolProperties->mmffMolProperties.get(); } else { prbMolProps = new MMFF::MMFFMolProperties(prbMol); if (!prbMolProps->isValid()) { throw_value_error("missing MMFF94 parameters for probe molecule"); } } if (refProps != python::object()) { refPyMMFFMolProperties = python::extract<ForceFields::PyMMFFMolProperties *>(refProps); refMolProps = refPyMMFFMolProperties->mmffMolProperties.get(); } else { refMolProps = new MMFF::MMFFMolProperties(refMol); if (!refMolProps->isValid()) { throw_value_error("missing MMFF94 parameters for reference molecule"); } } std::vector<boost::shared_ptr<O3A>> res; { NOGIL gil; getO3AForProbeConfs(prbMol, refMol, prbMolProps, refMolProps, res, numThreads, MolAlign::O3A::MMFF94, refCid, reflect, maxIters, options, cMap, cWts); } python::list pyres; for (auto &i : res) { pyres.append(new PyO3A(i)); } if (!prbPyMMFFMolProperties) delete prbMolProps; if (!refPyMMFFMolProperties) delete refMolProps; if (cMap) { delete cMap; } if (cWts) { delete cWts; } return python::tuple(pyres); }
RDKIT_WRAP_DECL void translate_value_error(ValueErrorException const&e){ throw_value_error(e.message()); }