예제 #1
0
// REVIEW: the poolSize can be pulled from the numeric array
RDKit::INT_VECT MaxMinPicks(MaxMinPicker *picker, python::object distMat,
                            int poolSize, int pickSize,
                            python::object firstPicks, int seed) {
  if (pickSize >= poolSize) {
    throw ValueErrorException("pickSize must be less than poolSize");
  }

  if (!PyArray_Check(distMat.ptr())) {
    throw ValueErrorException("distance mat argument must be a numpy matrix");
  }

  PyArrayObject *copy;
  copy = (PyArrayObject *)PyArray_ContiguousFromObject(distMat.ptr(),
                                                       PyArray_DOUBLE, 1, 1);
  double *dMat = (double *)copy->data;

  RDKit::INT_VECT firstPickVect;
  for (unsigned int i = 0;
       i < python::extract<unsigned int>(firstPicks.attr("__len__")()); ++i) {
    firstPickVect.push_back(python::extract<int>(firstPicks[i]));
  }
  RDKit::INT_VECT res =
      picker->pick(dMat, poolSize, pickSize, firstPickVect, seed);
  Py_DECREF(copy);
  return res;
}
예제 #2
0
  RDKit::INT_VECT HierarchicalClusterPicker::pick(const double *distMat, 
                                                  unsigned int poolSize,
                                                  unsigned int pickSize) const {
    PRECONDITION(distMat,"bad distance matrix");
    RDKit::VECT_INT_VECT clusters = this->cluster(distMat, poolSize, pickSize);
    CHECK_INVARIANT(clusters.size() == pickSize, "");

    // the last step: find a representative element from each of the
    // remaining clusters
    RDKit::INT_VECT picks;
    for (unsigned int i = 0; i < pickSize; i++) {
      int pick;
      double minSumD2 = RDKit::MAX_DOUBLE;
      for (RDKit::INT_VECT_CI cxi1 = clusters[i].begin();
           cxi1 != clusters[i].end(); ++cxi1 ) {
        int curPick = (*cxi1);
        double d2sum = 0.0;
        for (RDKit::INT_VECT_CI cxi2 = clusters[i].begin();
             cxi2 != clusters[i].end(); ++cxi2) {
          if (cxi1 == cxi2) {
            continue;
          }
          double d = getDistFromLTM(distMat, curPick, (*cxi2));
          d2sum += (d*d);
        }
        if (d2sum < minSumD2) {
          pick = curPick;
          minSumD2 = d2sum;
        }
      }
      picks.push_back(pick);
    }
    return picks;
  }
예제 #3
0
 void InfoBitRanker::setMaskBits(RDKit::INT_VECT &maskBits) {
   delete dp_maskBits;
   dp_maskBits = new ExplicitBitVect(d_dims);
   for (RDKit::INT_VECT_CI bi = maskBits.begin();
        bi != maskBits.end(); ++bi) {
     dp_maskBits->setBit(*bi);
   }
 }
예제 #4
0
void SetMaskBits(InfoBitRanker *ranker, python::object maskBits) {
    RDKit::INT_VECT cList;
    PySequenceHolder<int> bList(maskBits);
    cList.reserve(bList.size());
    for (unsigned int i = 0; i < bList.size(); i++) {
        cList.push_back(bList[i]);
    }
    ranker->setMaskBits(cList);
}
예제 #5
0
void setBitList(BitCorrMatGenerator *cmGen, python::object bitList) {
  PySequenceHolder<int> blist(bitList);
  unsigned int nb = blist.size();
  RDKit::INT_VECT res;
  res.reserve(nb);
  for (unsigned int i = 0; i < nb; i++) {
    res.push_back(blist[i]);
  }
  cmGen->setBitIdList(res);
}
예제 #6
0
RDKit::INT_VECT LazyMaxMinPicks(MaxMinPicker *picker, python::object distFunc,
                                int poolSize, int pickSize,
                                python::object firstPicks, int seed,
                                bool useCache) {
  RDKit::INT_VECT firstPickVect;
  for (unsigned int i = 0;
       i < python::extract<unsigned int>(firstPicks.attr("__len__")()); ++i) {
    firstPickVect.push_back(python::extract<int>(firstPicks[i]));
  }
  RDKit::INT_VECT res;
  pyobjFunctor functor(distFunc, useCache);
  res = picker->lazyPick(functor, poolSize, pickSize, firstPickVect, seed);
  return res;
}
예제 #7
0
 RDKit::INT_VECT LazyVectorMaxMinPicks(MaxMinPicker *picker, 
                                       python::object objs,
                                       int poolSize, 
                                       int pickSize,
                                       python::object firstPicks,
                                       int seed,
                                       DistanceMethod method
                                       ) {
   pyBVFunctor functor(objs,method);
   RDKit::INT_VECT firstPickVect;
   for(unsigned int i=0;i<python::extract<unsigned int>(firstPicks.attr("__len__")());++i){
     firstPickVect.push_back(python::extract<int>(firstPicks[i]));
   }
   RDKit::INT_VECT res=picker->lazyPick(functor, poolSize, pickSize,firstPickVect,seed);
   return res;
 }
예제 #8
0
RDKit::INT_VECT LazyVectorMaxMinPicks(MaxMinPicker *picker, python::object objs,
                                      int poolSize, int pickSize,
                                      python::object firstPicks, int seed,
                                      bool useCache) {
  std::vector<const ExplicitBitVect *> bvs(poolSize);
  for (int i = 0; i < poolSize; ++i) {
    bvs[i] = python::extract<const ExplicitBitVect *>(objs[i]);
  }
  pyBVFunctor<ExplicitBitVect> functor(bvs, TANIMOTO, useCache);
  RDKit::INT_VECT firstPickVect;
  for (unsigned int i = 0;
       i < python::extract<unsigned int>(firstPicks.attr("__len__")()); ++i) {
    firstPickVect.push_back(python::extract<int>(firstPicks[i]));
  }
  RDKit::INT_VECT res =
      picker->lazyPick(functor, poolSize, pickSize, firstPickVect, seed);
  return res;
}
예제 #9
0
  void InfoBitRanker::setBiasList(RDKit::INT_VECT &classList) {
    RANGE_CHECK(0, classList.size(), d_classes);
    d_biasList = classList;
    //make sure we don't have any duplicates
    std::sort(d_biasList.begin(), d_biasList.end());
    RDKit::INT_VECT_CI bi = std::unique(d_biasList.begin(), d_biasList.end());
    CHECK_INVARIANT(bi == d_biasList.end(), "There are duplicates in the class bias list");

    // finally make sure all the class ID in d_biasList are within range
    for (bi = d_biasList.begin(); bi != d_biasList.end(); bi++) {
      RANGE_CHECK(0, static_cast<unsigned int>(*bi), d_classes-1);
    }
  }
예제 #10
0
  RDKit::VECT_INT_VECT HierarchicalClusterPicker::cluster(const double *distMat,
                                                          unsigned int poolSize,
                                                          unsigned int pickSize) const {
    PRECONDITION(distMat, "Invalid Distance Matrix");
    PRECONDITION((poolSize >= pickSize),
                 "pickSize cannot be larger than the poolSize");

    // Do the clustering 
    long int method = (long int)d_method;
    long int len = poolSize*(poolSize-1);
    long int *ia = (long int *)calloc(poolSize, sizeof(long int));
    long int *ib = (long int *)calloc(poolSize, sizeof(long int));
    real *crit = (real *)calloc(poolSize,sizeof(real));
    CHECK_INVARIANT(ia,"failed to allocate memory");
    CHECK_INVARIANT(ib,"failed to allocate memory");
    CHECK_INVARIANT(crit,"failed to allocate memory");
    long int poolSize2=static_cast<long int>(poolSize);
    
    distdriver_(&poolSize2, // number of items in the pool
                &len, // number of entries in the distance matrix
                (real *)distMat, // distance matrix
                &method, // the clustering method (ward, slink etc.)
                ia, // int vector with clustering history
                ib, // one more clustering history matrix
                crit // I believe this is a vector the difference in heights of two clusters
                );

    // we have the clusters now merge then until the number of clusters is same
    // as the number of picks we need
    // before we do that a bit of explanation on the vectors "ia" and "ib"
    //  - We with each item in the pool as an individual cluster
    //  - then we use the vectors ia and ib to merge them.
    //     ia and ib provides the ids of the clusters that need to be merged
    //     it is assumed that when a cluster ia[j] is merged with ib[j] 
    //     ia[j] is replaced by the new cluster in the cluster list
    // 
    RDKit::VECT_INT_VECT clusters;
    for (unsigned int i = 0; i < poolSize; i++) {
      RDKit::INT_VECT cls;
      cls.push_back(i);
      clusters.push_back(cls);
    }

    // do the merging, each round of of this loop eleminates one cluster
    RDKit::INT_VECT removed;
    for (unsigned int i = 0; i < (poolSize - pickSize); i++) {
      int cx1 = ia[i] - 1;
      int cx2 = ib[i] - 1;

      // add the items from cluster cx2 to cx1
      // REVIEW: merge function???
      for (RDKit::INT_VECT_CI cx2i = clusters[cx2].begin(); cx2i != clusters[cx2].end(); cx2i++) {
        clusters[cx1].push_back(*cx2i);
      }
      
      // mark the second cluster as removed
      removed.push_back(cx2);
    }
    free(ia);
    free(ib);
    free(crit);

    // sort removed so that looping will be easier later
    std::sort(removed.begin(), removed.end());

    //some error checking here, uniqueify removed and the vector should not changed
    // REVIEW can we put this inside a #ifdef DEBUG?
    RDKit::INT_VECT_CI nEnd = std::unique(removed.begin(), removed.end());
    CHECK_INVARIANT(nEnd == removed.end(), "Somehow there are duplicates in the list of removed clusters");

    RDKit::VECT_INT_VECT res;
    unsigned int j = 0;
    for (unsigned int i = 0; i < poolSize; i++) {
      if (static_cast<int>(i) == removed[j]) {
        j++;
        continue;
      }
      res.push_back(clusters[i]);
    }
    return res;
  }