Пример #1
0
PyO3A *getMMFFO3A(ROMol &prbMol, ROMol &refMol, python::object prbProps,
                  python::object refProps, int prbCid = -1, int refCid = -1,
                  bool reflect = false, unsigned int maxIters = 50,
                  unsigned int options = 0,
                  python::list constraintMap = python::list(),
                  python::list constraintWeights = python::list()) {
    MatchVectType *cMap =
        (python::len(constraintMap) ? _translateAtomMap(constraintMap) : NULL);
    RDNumeric::DoubleVector *cWts = NULL;
    if (cMap) {
        cWts = _translateWeights(constraintWeights);
        if (cWts) {
            if ((*cMap).size() != (*cWts).size()) {
                throw_value_error(
                    "The number of weights should match the number of constraints");
            }
        }
        for (unsigned int i = 0; i < (*cMap).size(); ++i) {
            if (((*cMap)[i].first < 0) ||
                    ((*cMap)[i].first >= rdcast<int>(prbMol.getNumAtoms())) ||
                    ((*cMap)[i].second < 0) ||
                    ((*cMap)[i].second >= rdcast<int>(refMol.getNumAtoms()))) {
                throw_value_error("Constrained atom idx out of range");
            }
            if ((prbMol[(*cMap)[i].first]->getAtomicNum() == 1) ||
                    (refMol[(*cMap)[i].second]->getAtomicNum() == 1)) {
                throw_value_error("Constrained atoms must be heavy atoms");
            }
        }
    }
    ForceFields::PyMMFFMolProperties *prbPyMMFFMolProperties = NULL;
    MMFF::MMFFMolProperties *prbMolProps = NULL;
    ForceFields::PyMMFFMolProperties *refPyMMFFMolProperties = NULL;
    MMFF::MMFFMolProperties *refMolProps = NULL;

    if (prbProps != python::object()) {
        prbPyMMFFMolProperties =
            python::extract<ForceFields::PyMMFFMolProperties *>(prbProps);
        prbMolProps = prbPyMMFFMolProperties->mmffMolProperties.get();
    } else {
        prbMolProps = new MMFF::MMFFMolProperties(prbMol);
        if (!prbMolProps->isValid()) {
            throw_value_error("missing MMFF94 parameters for probe molecule");
        }
    }
    if (refProps != python::object()) {
        refPyMMFFMolProperties =
            python::extract<ForceFields::PyMMFFMolProperties *>(refProps);
        refMolProps = refPyMMFFMolProperties->mmffMolProperties.get();
    } else {
        refMolProps = new MMFF::MMFFMolProperties(refMol);
        if (!refMolProps->isValid()) {
            throw_value_error("missing MMFF94 parameters for reference molecule");
        }
    }
    O3A *o3a;
    {
        NOGIL gil;
        o3a = new MolAlign::O3A(prbMol, refMol, prbMolProps, refMolProps,
                                MolAlign::O3A::MMFF94, prbCid, refCid, reflect,
                                maxIters, options, cMap, cWts);
    }
    PyO3A *pyO3A = new PyO3A(o3a);

    if (!prbPyMMFFMolProperties) delete prbMolProps;
    if (!refPyMMFFMolProperties) delete refMolProps;
    if (cMap) {
        delete cMap;
    }
    if (cWts) {
        delete cWts;
    }

    return pyO3A;
}
Пример #2
0
/***********************************************
 
   Recursively finds the best quantization boundaries

   **Arguments**

     - vals: a 1D Numeric array with the values of the variables,
       this should be sorted

     - cuts: a list with the indices of the quantization bounds
       (indices are into _starts_ )

     - which: an integer indicating which bound is being adjusted here
       (and index into _cuts_ )

     - starts: a list of potential starting points for quantization bounds

     - results: a 1D Numeric array of integer result codes

     - nPossibleRes: an integer with the number of possible result codes

   **Returns**

     - a 2-tuple containing:

       1) the best information gain found so far

       2) a list of the quantization bound indices ( _cuts_ for the best case)
   
   **Notes**

    - this is not even remotely efficient, which is why a C replacement
      was written

    - this is a drop-in replacement for *ML.Data.Quantize._PyRecurseBounds*
						
 ***********************************************/
static python::tuple
cQuantize_RecurseOnBounds(python::object vals, python::list pyCuts, int which, 
			  python::list pyStarts, python::object results, int nPossibleRes)
{
  PyArrayObject *contigVals,*contigResults;
  long int *cuts,*starts;

  /*
    -------

    Setup code

    -------
  */
  contigVals 
    = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(vals.ptr(),PyArray_DOUBLE,1,1));
  if(!contigVals){
    throw_value_error("could not convert value argument");
  }

  contigResults 
    = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(results.ptr(),PyArray_LONG,1,1));
  if(!contigResults){
    throw_value_error("could not convert results argument");
  }

  python::ssize_t nCuts = python::len(pyCuts);
  cuts = (long int *)calloc(nCuts,sizeof(long int));
  for (python::ssize_t i=0; i<nCuts; i++) {
    python::object elem = pyCuts[i];
    cuts[i] = python::extract<long int>(elem);
  }

  python::ssize_t nStarts = python::len(pyStarts);
  starts = (long int *)calloc(nStarts,sizeof(long int));
  for (python::ssize_t i=0; i<nStarts; i++){
    python::object elem = pyStarts[i];
    starts[i] = python::extract<long int>(elem);
  }

  // do the real work
  double gain 
    = RecurseHelper((double *)contigVals->data,contigVals->dimensions[0],
		    cuts,nCuts,which,starts,nStarts,
		    (long int *)contigResults->data,nPossibleRes);
		       
  /*
    -------

    Construct the return value

    -------
  */
  python::list cutObj;
  for (python::ssize_t i=0; i<nCuts; i++) {
    cutObj.append(cuts[i]);
  }
  free(cuts);
  free(starts);
  return python::make_tuple(gain, cutObj); 
}
Пример #3
0
static python::list
cQuantize_FindStartPoints(python::object values, python::object results, 
			  int nData)
{
  python::list startPts;

  if(nData<2){
    return startPts;
  }

  PyArrayObject *contigVals 
    = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(values.ptr(),PyArray_DOUBLE,1,1));
  if(!contigVals){
    throw_value_error("could not convert value argument");
  }

  double *vals=(double *)contigVals->data;

  PyArrayObject *contigResults 
    = reinterpret_cast<PyArrayObject *>(PyArray_ContiguousFromObject(results.ptr(),PyArray_LONG,1,1));
  if(!contigResults){
    throw_value_error("could not convert results argument");
  }

  long *res=(long *)contigResults->data;

  bool firstBlock=true;
  long lastBlockAct=-2,blockAct=res[0];
  int lastDiv=-1;
  double tol=1e-8;

  int i=1;
  while(i<nData){
    while(i<nData && vals[i]-vals[i-1]<=tol){
      if(res[i]!=blockAct){
        blockAct=-1;
      }
      ++i;
    }
    if(firstBlock){
      firstBlock=false;
      lastBlockAct=blockAct;
      lastDiv=i;
    } else {
      if(blockAct==-1 || lastBlockAct==-1 || blockAct!=lastBlockAct){
	startPts.append(lastDiv);
        lastDiv=i;
        lastBlockAct=blockAct;
      } else {
        lastDiv=i;
      }
    }
    if(i<nData) blockAct=res[i];
    ++i; 
  }

  // catch the case that the last point also sets a bin:
  if( blockAct != lastBlockAct ){
    startPts.append(lastDiv);
  }

  return startPts;
}
Пример #4
0
PyObject *getEuclideanDistMat(python::object descripMat) {
  // Bit of a pain involved here, we accept three types of PyObjects here
  // 1. A Numeric Array
  //     - first find what 'type' of entry we have (float, double and int is all
  //     we recognize for now)
  //     - then point to contiguous piece of memory from the array that contains
  //     the data with a type*
  //     - then make a new type** pointer so that double index into this
  //     contiguous memory will work
  //       and then pass it along to the distance calculator
  // 2. A list of Numeric Vector (or 1D arrays)
  //     - in this case wrap descripMat with a PySequenceHolder<type*> where
  //     type is the
  //       type of entry in vector (accepted types are int, double and float
  //     - Then pass the PySequenceHolder to the metrci calculator
  // 3. A list (or tuple) of lists (or tuple)
  //     - In this case other than wrapping descripMat with a PySequenceHolder
  //       each of the indivual list in there are also wrapped by a
  //       PySequenceHolder
  //     - so the distance calculator is passed in a
  //     "PySequenceHolder<PySequenceHolder<double>>"
  //     - FIX: not that we always convert entry values to double here, even if
  //     we passed
  //       in a list of list of ints (or floats). Given that lists can be
  //       heterogeneous, I do not
  //       know how to ask a list what type of entries if contains.
  //
  //  OK my brain is going to explode now

  // first deal with situation where we have an Numeric Array
  PyObject *descMatObj = descripMat.ptr();
  PyArrayObject *distRes;
  if (PyArray_Check(descMatObj)) {
    // get the dimensions of the array
    int nrows = ((PyArrayObject *)descMatObj)->dimensions[0];
    int ncols = ((PyArrayObject *)descMatObj)->dimensions[1];
    int i;
    CHECK_INVARIANT((nrows > 0) && (ncols > 0), "");

    npy_intp dMatLen = nrows * (nrows - 1) / 2;

    // now that we have the dimensions declare the distance matrix which is
    // always a
    // 1D double array
    distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);

    // grab a pointer to the data in the array so that we can directly put
    // values in there
    // and avoid copying :
    double *dMat = (double *)distRes->data;

    PyArrayObject *copy;
    copy = (PyArrayObject *)PyArray_ContiguousFromObject(
        descMatObj, ((PyArrayObject *)descMatObj)->descr->type_num, 2, 2);
    // if we have double array
    if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_DOUBLE) {
      double *desc = (double *)copy->data;

      // REVIEW: create an adaptor object to hold a double * and support
      //  operator[]() so that we don't have to do this stuff:

      // here is the 2D array trick this so that when the distance calaculator
      // asks for desc2D[i] we basically get the ith row as double*
      double **desc2D = new double *[nrows];
      for (i = 0; i < nrows; i++) {
        desc2D[i] = desc;
        desc += ncols;
      }
      MetricMatrixCalc<double **, double *> mmCalc;
      mmCalc.setMetricFunc(&EuclideanDistanceMetric<double *, double *>);
      mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);

      delete[] desc2D;
      // we got the distance matrix we are happy so return
      return PyArray_Return(distRes);
    }

    // if we have a float array
    else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_FLOAT) {
      float *desc = (float *)copy->data;
      float **desc2D = new float *[nrows];
      for (i = 0; i < nrows; i++) {
        desc2D[i] = desc;
        desc += ncols;
      }
      MetricMatrixCalc<float **, float *> mmCalc;
      mmCalc.setMetricFunc(&EuclideanDistanceMetric<float *, float *>);
      mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);
      delete[] desc2D;
      return PyArray_Return(distRes);
    }

    // if we have an interger array
    else if (((PyArrayObject *)descMatObj)->descr->type_num == NPY_INT) {
      int *desc = (int *)copy->data;
      int **desc2D = new int *[nrows];
      for (i = 0; i < nrows; i++) {
        desc2D[i] = desc;
        desc += ncols;
      }
      MetricMatrixCalc<int **, int *> mmCalc;
      mmCalc.setMetricFunc(&EuclideanDistanceMetric<int *, int *>);
      mmCalc.calcMetricMatrix(desc2D, nrows, ncols, dMat);
      delete[] desc2D;
      return PyArray_Return(distRes);
    } else {
      // unreconiged type for the matrix, throw up
      throw_value_error(
          "The array has to be of type int, float, or double for "
          "GetEuclideanDistMat");
    }
  }  // done with an array input
  else {
    // REVIEW: removed a ton of code here

    // we have probably have a list or a tuple

    unsigned int ncols = 0;
    unsigned int nrows =
        python::extract<unsigned int>(descripMat.attr("__len__")());
    CHECK_INVARIANT(nrows > 0, "Empty list passed in");

    npy_intp dMatLen = nrows * (nrows - 1) / 2;
    distRes = (PyArrayObject *)PyArray_SimpleNew(1, &dMatLen, NPY_DOUBLE);
    double *dMat = (double *)distRes->data;

    // assume that we a have a list of list of values (that can be extracted to
    // double)
    std::vector<PySequenceHolder<double> > dData;
    dData.reserve(nrows);
    for (unsigned int i = 0; i < nrows; i++) {
      // PySequenceHolder<double> row(seq[i]);
      PySequenceHolder<double> row(descripMat[i]);
      if (i == 0) {
        ncols = row.size();
      } else if (row.size() != ncols) {
        throw_value_error("All subsequences must be the same length");
      }
      dData.push_back(row);
    }

    MetricMatrixCalc<std::vector<PySequenceHolder<double> >,
                     PySequenceHolder<double> > mmCalc;
    mmCalc.setMetricFunc(&EuclideanDistanceMetric<PySequenceHolder<double>,
                                                  PySequenceHolder<double> >);
    mmCalc.calcMetricMatrix(dData, nrows, ncols, dMat);
  }
  return PyArray_Return(distRes);
}
Пример #5
0
  PyObject *embedBoundsMatrix(python::object boundsMatArg,int maxIters=10,
                              bool randomizeOnFailure=false,int numZeroFail=2,
                              python::list weights=python::list(),
                              int randomSeed=-1){
    PyObject *boundsMatObj = boundsMatArg.ptr();
    if(!PyArray_Check(boundsMatObj))
      throw_value_error("Argument isn't an array");
    
    PyArrayObject *boundsMat=reinterpret_cast<PyArrayObject *>(boundsMatObj);
    // get the dimensions of the array
    unsigned int nrows = boundsMat->dimensions[0];
    unsigned int ncols = boundsMat->dimensions[1];
    if(nrows!=ncols)
      throw_value_error("The array has to be square");
    if(nrows<=0)
      throw_value_error("The array has to have a nonzero size");
    if (boundsMat->descr->type_num != PyArray_DOUBLE)
      throw_value_error("Only double arrays are currently supported");

    unsigned int dSize = nrows*nrows;
    double *cData = new double[dSize];
    double *inData = reinterpret_cast<double *>(boundsMat->data);
    memcpy(static_cast<void *>(cData), 
           static_cast<const void *>(inData),
           dSize*sizeof(double));

    DistGeom::BoundsMatrix::DATA_SPTR sdata(cData);
    DistGeom::BoundsMatrix bm(nrows,sdata);

    RDGeom::Point3D *positions=new RDGeom::Point3D[nrows];
    std::vector<RDGeom::Point *> posPtrs;
    for (unsigned int i = 0; i < nrows; i++) {
      posPtrs.push_back(&positions[i]);
    }

    RDNumeric::DoubleSymmMatrix distMat(nrows, 0.0);

    // ---- ---- ---- ---- ---- ---- ---- ---- ---- 
    // start the embedding:
    bool gotCoords=false;
    for(int iter=0;iter<maxIters && !gotCoords;iter++){
      // pick a random distance matrix
      DistGeom::pickRandomDistMat(bm,distMat,randomSeed);

      // and embed it:
      gotCoords=DistGeom::computeInitialCoords(distMat,posPtrs,randomizeOnFailure,
                                               numZeroFail,randomSeed);

      // update the seed:
      if(randomSeed>=0) randomSeed+=iter*999;
    }

    if(gotCoords){
      std::map<std::pair<int,int>,double> weightMap;
      unsigned int nElems=PySequence_Size(weights.ptr());
      for(unsigned int entryIdx=0;entryIdx<nElems;entryIdx++){
        PyObject *entry=PySequence_GetItem(weights.ptr(),entryIdx);
        if(!PySequence_Check(entry) || PySequence_Size(entry)!=3){
          throw_value_error("weights argument must be a sequence of 3-sequences");
        }
        int idx1=PyInt_AsLong(PySequence_GetItem(entry,0));
        int idx2=PyInt_AsLong(PySequence_GetItem(entry,1));
        double w=PyFloat_AsDouble(PySequence_GetItem(entry,2));
        weightMap[std::make_pair(idx1,idx2)]=w;
      }
      DistGeom::VECT_CHIRALSET csets;
      ForceFields::ForceField *field = DistGeom::constructForceField(bm,posPtrs,csets,0.0, 0.0,
                                                                     &weightMap);
      CHECK_INVARIANT(field,"could not build dgeom force field");
      field->initialize();
      if(field->calcEnergy()>1e-5){
        int needMore=1;
        while(needMore){
          needMore=field->minimize();
        }
      }
      delete field;
    } else {
      throw_value_error("could not embed matrix");
    }

    // ---- ---- ---- ---- ---- ---- ---- ---- ---- 
    // construct the results matrix:
    npy_intp dims[2];
    dims[0] = nrows;
    dims[1] = 3;
    PyArrayObject *res = (PyArrayObject *)PyArray_SimpleNew(2,dims,NPY_DOUBLE);
    double *resData=reinterpret_cast<double *>(res->data);
    for(unsigned int i=0;i<nrows;i++){
      unsigned int iTab=i*3;
      for (unsigned int j = 0; j < 3; ++j) {
        resData[iTab + j]=positions[i][j]; //.x;
      }
    }
    delete [] positions;

    return PyArray_Return(res);
  }
Пример #6
0
python::tuple getCrippenO3AForConfs(
    ROMol &prbMol, ROMol &refMol, int numThreads,
    python::list prbCrippenContribs, python::list refCrippenContribs,
    int refCid = -1, bool reflect = false, unsigned int maxIters = 50,
    unsigned int options = 0, python::list constraintMap = python::list(),
    python::list constraintWeights = python::list()) {
  MatchVectType *cMap =
      (python::len(constraintMap) ? _translateAtomMap(constraintMap) : nullptr);
  RDNumeric::DoubleVector *cWts = nullptr;
  if (cMap) {
    cWts = _translateWeights(constraintWeights);
    if (cWts) {
      if ((*cMap).size() != (*cWts).size()) {
        throw_value_error(
            "The number of weights should match the number of constraints");
      }
    }
    for (auto &i : (*cMap)) {
      if ((i.first < 0) || (i.first >= rdcast<int>(prbMol.getNumAtoms())) ||
          (i.second < 0) || (i.second >= rdcast<int>(refMol.getNumAtoms()))) {
        throw_value_error("Constrained atom idx out of range");
      }
      if ((prbMol[i.first]->getAtomicNum() == 1) ||
          (refMol[i.second]->getAtomicNum() == 1)) {
        throw_value_error("Constrained atoms must be heavy atoms");
      }
    }
  }
  unsigned int prbNAtoms = prbMol.getNumAtoms();
  std::vector<double> prbLogpContribs(prbNAtoms);
  unsigned int refNAtoms = refMol.getNumAtoms();
  std::vector<double> refLogpContribs(refNAtoms);

  if ((prbCrippenContribs != python::list()) &&
      (python::len(prbCrippenContribs) == prbNAtoms)) {
    for (unsigned int i = 0; i < prbNAtoms; ++i) {
      python::tuple logpMRTuple =
          python::extract<python::tuple>(prbCrippenContribs[i]);
      prbLogpContribs[i] = python::extract<double>(logpMRTuple[0]);
    }
  } else {
    std::vector<double> prbMRContribs(prbNAtoms);
    std::vector<unsigned int> prbAtomTypes(prbNAtoms);
    std::vector<std::string> prbAtomTypeLabels(prbNAtoms);
    Descriptors::getCrippenAtomContribs(prbMol, prbLogpContribs, prbMRContribs,
                                        true, &prbAtomTypes,
                                        &prbAtomTypeLabels);
  }
  if ((refCrippenContribs != python::list()) &&
      (python::len(refCrippenContribs) == refNAtoms)) {
    for (unsigned int i = 0; i < refNAtoms; ++i) {
      python::tuple logpMRTuple =
          python::extract<python::tuple>(refCrippenContribs[i]);
      refLogpContribs[i] = python::extract<double>(logpMRTuple[0]);
    }
  } else {
    std::vector<double> refMRContribs(refNAtoms);
    std::vector<unsigned int> refAtomTypes(refNAtoms);
    std::vector<std::string> refAtomTypeLabels(refNAtoms);
    Descriptors::getCrippenAtomContribs(refMol, refLogpContribs, refMRContribs,
                                        true, &refAtomTypes,
                                        &refAtomTypeLabels);
  }
  std::vector<boost::shared_ptr<O3A>> res;
  {
    NOGIL gil;
    getO3AForProbeConfs(prbMol, refMol, &prbLogpContribs, &refLogpContribs, res,
                        numThreads, MolAlign::O3A::CRIPPEN, refCid, reflect,
                        maxIters, options, cMap, cWts);
  }
  python::list pyres;
  for (auto &re : res) {
    pyres.append(new PyO3A(re));
  }

  if (cMap) {
    delete cMap;
  }
  if (cWts) {
    delete cWts;
  }

  return python::tuple(pyres);
}
Пример #7
0
python::tuple getMMFFO3AForConfs(
    ROMol &prbMol, ROMol &refMol, int numThreads, python::object prbProps,
    python::object refProps, int refCid = -1, bool reflect = false,
    unsigned int maxIters = 50, unsigned int options = 0,
    python::list constraintMap = python::list(),
    python::list constraintWeights = python::list()) {
  MatchVectType *cMap =
      (python::len(constraintMap) ? _translateAtomMap(constraintMap) : nullptr);
  RDNumeric::DoubleVector *cWts = nullptr;
  if (cMap) {
    cWts = _translateWeights(constraintWeights);
    if (cWts) {
      if ((*cMap).size() != (*cWts).size()) {
        throw_value_error(
            "The number of weights should match the number of constraints");
      }
    }
    for (auto &i : (*cMap)) {
      if ((i.first < 0) || (i.first >= rdcast<int>(prbMol.getNumAtoms())) ||
          (i.second < 0) || (i.second >= rdcast<int>(refMol.getNumAtoms()))) {
        throw_value_error("Constrained atom idx out of range");
      }
      if ((prbMol[i.first]->getAtomicNum() == 1) ||
          (refMol[i.second]->getAtomicNum() == 1)) {
        throw_value_error("Constrained atoms must be heavy atoms");
      }
    }
  }
  ForceFields::PyMMFFMolProperties *prbPyMMFFMolProperties = nullptr;
  MMFF::MMFFMolProperties *prbMolProps = nullptr;
  ForceFields::PyMMFFMolProperties *refPyMMFFMolProperties = nullptr;
  MMFF::MMFFMolProperties *refMolProps = nullptr;

  if (prbProps != python::object()) {
    prbPyMMFFMolProperties =
        python::extract<ForceFields::PyMMFFMolProperties *>(prbProps);
    prbMolProps = prbPyMMFFMolProperties->mmffMolProperties.get();
  } else {
    prbMolProps = new MMFF::MMFFMolProperties(prbMol);
    if (!prbMolProps->isValid()) {
      throw_value_error("missing MMFF94 parameters for probe molecule");
    }
  }
  if (refProps != python::object()) {
    refPyMMFFMolProperties =
        python::extract<ForceFields::PyMMFFMolProperties *>(refProps);
    refMolProps = refPyMMFFMolProperties->mmffMolProperties.get();
  } else {
    refMolProps = new MMFF::MMFFMolProperties(refMol);
    if (!refMolProps->isValid()) {
      throw_value_error("missing MMFF94 parameters for reference molecule");
    }
  }
  std::vector<boost::shared_ptr<O3A>> res;
  {
    NOGIL gil;
    getO3AForProbeConfs(prbMol, refMol, prbMolProps, refMolProps, res,
                        numThreads, MolAlign::O3A::MMFF94, refCid, reflect,
                        maxIters, options, cMap, cWts);
  }

  python::list pyres;
  for (auto &i : res) {
    pyres.append(new PyO3A(i));
  }

  if (!prbPyMMFFMolProperties) delete prbMolProps;
  if (!refPyMMFFMolProperties) delete refMolProps;
  if (cMap) {
    delete cMap;
  }
  if (cWts) {
    delete cWts;
  }

  return python::tuple(pyres);
}
Пример #8
0
RDKIT_WRAP_DECL void translate_value_error(ValueErrorException const&e){
  throw_value_error(e.message());
}