示例#1
0
// REVIEW: the poolSize can be pulled from the numeric array
RDKit::INT_VECT MaxMinPicks(MaxMinPicker *picker, python::object distMat,
                            int poolSize, int pickSize,
                            python::object firstPicks, int seed) {
  if (pickSize >= poolSize) {
    throw ValueErrorException("pickSize must be less than poolSize");
  }

  if (!PyArray_Check(distMat.ptr())) {
    throw ValueErrorException("distance mat argument must be a numpy matrix");
  }

  PyArrayObject *copy;
  copy = (PyArrayObject *)PyArray_ContiguousFromObject(distMat.ptr(),
                                                       PyArray_DOUBLE, 1, 1);
  double *dMat = (double *)copy->data;

  RDKit::INT_VECT firstPickVect;
  for (unsigned int i = 0;
       i < python::extract<unsigned int>(firstPicks.attr("__len__")()); ++i) {
    firstPickVect.push_back(python::extract<int>(firstPicks[i]));
  }
  RDKit::INT_VECT res =
      picker->pick(dMat, poolSize, pickSize, firstPickVect, seed);
  Py_DECREF(copy);
  return res;
}
示例#2
0
  unsigned int computeL1Norm(const DiscreteValueVect &v1, const DiscreteValueVect &v2) {
    if (v1.getLength() != v2.getLength()) {
      throw ValueErrorException("Comparing vectors of different lengths");
    }

    DiscreteValueVect::DiscreteValueType valType = v1.getValueType();

    if (valType != v2.getValueType()) {
      throw ValueErrorException("Comparing vector of different value types");
    }

    const boost::uint32_t* data1 = v1.getData();
    const boost::uint32_t* data2 = v2.getData();

    unsigned int res = 0;
    if (valType <= DiscreteValueVect::EIGHTBITVALUE) {
      DiscreteDistMat *dmat = getDiscreteDistMat();
    
      unsigned char *cd1 = (unsigned char *)(data1);
      unsigned char *cd2 = (unsigned char *)(data2);
      const unsigned char *cend = cd1 + (v1.getNumInts()*4);
      while (cd1 != cend) {
        if (*cd1 == *cd2) {
          cd1++;
          cd2++;
          continue;
        }
        res += dmat->getDist(*cd1, *cd2, valType);
        cd1++;
        cd2++;
      }
    } else {
      // we have a sixteen bits per value type
      // REVIEW: we are making an assumption here that a short 
      // is 16 bit - may fail on a different compiler
      const unsigned short int *sd1 = (unsigned short int *)(data1);
      const unsigned short int *sd2 = (unsigned short int *)(data2);
    
      const unsigned short int *send = sd1 + (v1.getNumInts()*2);
      while (sd1 != send) {
        if (*sd1 == *sd2) {
          sd1++;
          sd2++;
          continue;
        }
        res += abs((*sd1) - (*sd2));
        sd1++;
        sd2++;
      }
    }
    return res;
  }
示例#3
0
  void UniformGrid3D::initFromText(const char *pkl,const unsigned int length){
    std::stringstream ss(std::ios_base::binary|std::ios_base::in|std::ios_base::out);
    ss.write(pkl,length);
    boost::int32_t tVers;
    streamRead(ss,tVers);
    tVers *= -1;
    if(tVers==0x1){
      
    } else {
      throw ValueErrorException("bad version in UniformGrid3D pickle");
    }
    boost::uint32_t tInt;
    streamRead(ss,tInt);
    d_numX=tInt;
    streamRead(ss,tInt);
    d_numY=tInt;
    streamRead(ss,tInt);
    d_numZ=tInt;
    streamRead(ss,d_spacing);
    double oX,oY,oZ;
    streamRead(ss,oX);
    streamRead(ss,oY);
    streamRead(ss,oZ);
    d_offSet = Point3D(oX,oY,oZ);

    boost::uint32_t pklSz;
    streamRead(ss,pklSz);
    char *buff = new char[pklSz];
    ss.read(buff,pklSz*sizeof(char));
    if(dp_storage) delete dp_storage;
    dp_storage = new RDKit::DiscreteValueVect(buff,pklSz);
    delete [] buff;
  }
示例#4
0
void extractPopCounts(FPBReader_impl *dp_impl, boost::uint64_t sz,
                      const boost::uint8_t *chunk) {
  PRECONDITION(dp_impl, "bad pointer");
  /* this section of the FPB format is under-documented in Andrew's code,
   * fortunately it looks pretty simple
  */
  if (sz % 4)
    throw ValueErrorException("POPC chunk size must be a multiple of 4 bytes");
  unsigned int nEntries = sz / 4;
  if (nEntries < 9)
    throw ValueErrorException("POPC must contain at least 9 offsets");

  dp_impl->popCountOffsets.reserve(nEntries);
  for (unsigned int i = 0; i < nEntries; ++i) {
    dp_impl->popCountOffsets.push_back(
        *reinterpret_cast<const boost::uint32_t *>(chunk));
    chunk += 4;
  }
};
示例#5
0
// """ -------------------------------------------------------
//
// getOnBits(IntVect &which)
//  C++: Passes the set of on bits out in the IntVect passed in.
//   The contents of IntVect are destroyed.
//
//  Python: Returns the tuple of on bits
//
// """ -------------------------------------------------------
void SparseBitVect::getOnBits(IntVect &v) const {
  if (!dp_bits) {
    throw ValueErrorException("BitVect not properly initialized.");
  }
  unsigned int nOn = getNumOnBits();
  if (!v.empty()) IntVect().swap(v);
  v.reserve(nOn);
  v.resize(nOn);
  std::copy(dp_bits->begin(), dp_bits->end(), v.begin());
};
示例#6
0
// """ -------------------------------------------------------
//
// setBit(const IntSetIter which)  (C++ SPECIFIC)
//  Sets bit which to be on.
//
//  Returns the original state of the bit
//
// """ -------------------------------------------------------
bool SparseBitVect::setBit(const IntSetIter which) {
  if (!dp_bits) {
    throw ValueErrorException("BitVect not properly initialized.");
  }
  std::pair<IntSetIter, bool> res;
  if (*which < 0 || static_cast<unsigned int>(*which) >= d_size) {
    throw IndexErrorException(*which);
  }
  res = dp_bits->insert(*which);
  return !(res.second);
}
// REVIEW: the poolSize can be pulled from the numeric array
RDKit::INT_VECT HierarchicalPicks(HierarchicalClusterPicker *picker,
                                  python::object &distMat, int poolSize,
                                  int pickSize) {
  if (pickSize >= poolSize) {
    throw ValueErrorException("pickSize must be less than poolSize");
  }
  if (!PyArray_Check(distMat.ptr())) {
    throw ValueErrorException("distance mat argument must be a numpy matrix");
  }

  PyArrayObject *copy;
  // it's painful to have to copy the input matrix, but the
  // picker itself will step on the distance matrix, so use
  // CopyFromObject here instead of ContiguousFromObject
  copy =
      (PyArrayObject *)PyArray_CopyFromObject(distMat.ptr(), NPY_DOUBLE, 1, 1);
  double *dMat = (double *)PyArray_DATA(copy);
  RDKit::INT_VECT res = picker->pick(dMat, poolSize, pickSize);
  Py_DECREF(copy);
  return res;
}
示例#8
0
 void DiscreteValueVect::setVal(unsigned int i, unsigned int val) {
   if(i >= d_length){
     throw IndexErrorException(i);
   }
   if ((val & d_mask) != val) {
     throw ValueErrorException("Value out of range");
   }
   unsigned int shift = d_bitsPerVal*(i%d_valsPerInt);
   unsigned int intId = i/d_valsPerInt;
   unsigned int mask = ((1<<d_bitsPerVal) -1) << shift;
   mask = ~mask;
   d_data[intId] = (d_data[intId]&mask)|(val << shift);
 }
示例#9
0
文件: MMPA.cpp 项目: gerebtzoff/rdkit
static inline void processCuts(
    size_t i, size_t minCuts, size_t maxCuts, BondVector_t& bonds_selected,
    const std::vector<BondVector_t>& matching_bonds, const ROMol& mol,
    std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >& res) {
  if(maxCuts < minCuts)
    throw ValueErrorException("supplied maxCuts is less than minCuts");
        
  if(minCuts==0)
    throw ValueErrorException("minCuts must be greater than 0");

  for (size_t x = i; x < matching_bonds.size(); x++) {
    appendBonds(bonds_selected, matching_bonds[x]);
    if(bonds_selected.size() >= minCuts) {
      addResult(res, mol, bonds_selected, maxCuts);
    }
    if (bonds_selected.size() < maxCuts) {
      processCuts(x + 1, minCuts, maxCuts, bonds_selected, matching_bonds, mol, res);
    }
    
    bonds_selected.pop_back();
  }
}
示例#10
0
void fragmentOnSomeBonds(
    const ROMol &mol, const std::vector<unsigned int> &bondIndices,
    std::vector<ROMOL_SPTR> &resMols, unsigned int maxToCut, bool addDummies,
    const std::vector<std::pair<unsigned int, unsigned int>> *dummyLabels,
    const std::vector<Bond::BondType> *bondTypes,
    std::vector<std::vector<unsigned int>> *nCutsPerAtom) {
  PRECONDITION((!dummyLabels || dummyLabels->size() == bondIndices.size()),
               "bad dummyLabel vector");
  PRECONDITION((!bondTypes || bondTypes->size() == bondIndices.size()),
               "bad bondType vector");
  if (bondIndices.size() > 63)
    throw ValueErrorException("currently can only fragment on up to 63 bonds");
  if (!maxToCut || !mol.getNumAtoms() || !bondIndices.size()) return;

  boost::uint64_t state = (0x1L << maxToCut) - 1;
  boost::uint64_t stop = 0x1L << bondIndices.size();
  std::vector<unsigned int> fragmentHere(maxToCut);
  std::vector<std::pair<unsigned int, unsigned int>> *dummyLabelsHere = nullptr;
  if (dummyLabels) {
    dummyLabelsHere =
        new std::vector<std::pair<unsigned int, unsigned int>>(maxToCut);
  }
  std::vector<Bond::BondType> *bondTypesHere = nullptr;
  if (bondTypes) {
    bondTypesHere = new std::vector<Bond::BondType>(maxToCut);
  }
  while (state < stop) {
    unsigned int nSeen = 0;
    for (unsigned int i = 0; i < bondIndices.size() && nSeen < maxToCut; ++i) {
      if (state & (0x1L << i)) {
        fragmentHere[nSeen] = bondIndices[i];
        if (dummyLabelsHere) (*dummyLabelsHere)[nSeen] = (*dummyLabels)[i];
        if (bondTypesHere) (*bondTypesHere)[nSeen] = (*bondTypes)[i];
        ++nSeen;
      }
    }
    std::vector<unsigned int> *lCutsPerAtom = nullptr;
    if (nCutsPerAtom) {
      nCutsPerAtom->push_back(std::vector<unsigned int>(mol.getNumAtoms()));
      lCutsPerAtom = &(nCutsPerAtom->back());
    }
    ROMol *nm = fragmentOnBonds(mol, fragmentHere, addDummies, dummyLabelsHere,
                                bondTypesHere, lCutsPerAtom);
    resMols.push_back(ROMOL_SPTR(nm));

    state = nextBitCombo(state);
  }
  delete dummyLabelsHere;
  delete bondTypesHere;
}
示例#11
0
// """ -------------------------------------------------------
//
//  Sets bit which to be off.
//
//  Returns the original state of the bit
//
// """ -------------------------------------------------------
bool SparseBitVect::unsetBit(const unsigned int which) {
  if (!dp_bits) {
    throw ValueErrorException("BitVect not properly initialized.");
  }
  if (which >= d_size) {
    throw IndexErrorException(which);
  }

  if (dp_bits->count(which)) {
    dp_bits->erase(dp_bits->find(which));
    return true;
  } else {
    return false;
  }
}
示例#12
0
double tversky(const FPBReader_impl *dp_impl, unsigned int which,
               const ::boost::uint8_t *bv, double ca, double cb) {
  PRECONDITION(dp_impl, "bad reader pointer");
  PRECONDITION(bv, "bad bv pointer");
  if (which >= dp_impl->len) {
    throw ValueErrorException("bad index");
  }
  boost::uint8_t *fpData;
  if (dp_impl->df_lazy) {
    fpData = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint];
  }
  extractBytes(dp_impl, which, fpData);
  double res = CalcBitmapTversky(fpData, bv,
                                 dp_impl->numBytesStoredPerFingerprint, ca, cb);
  if (dp_impl->df_lazy) delete[] fpData;
  return res;
};
示例#13
0
std::vector<int> stringToCharge(std::string charge_str) {
  std::vector<int> charges;
  for (const auto& c : charge_str) {
    switch (c) {
      case '+':
        charges.push_back(1);
        break;
      case '0':
        charges.push_back(0);
        break;
      case '-':
        charges.push_back(-1);
        break;
      default:
        throw ValueErrorException("Charge symbol not recognised.");
    }
  }
  return charges;
}
示例#14
0
// if dp_impl->df_lazy is true, we'll use the memory in fpData (should be large
// enough to hold the result!), otherwise
// we update it to a pointer to the memory dp_impl owns.
void extractBytes(const FPBReader_impl *dp_impl, unsigned int which,
                  boost::uint8_t *&fpData, unsigned int nToRead = 1) {
  PRECONDITION(dp_impl, "bad reader pointer");
  PRECONDITION((dp_impl->df_lazy || dp_impl->dp_fpData), "bad fpdata pointer");
  PRECONDITION(!dp_impl->df_lazy || dp_impl->istrm, "no stream in lazy mode");
  PRECONDITION(!dp_impl->df_lazy || fpData, "no fpData in lazy mode");
  PRECONDITION(nToRead > 0, "bad nToRead");

  if (which + nToRead > dp_impl->len) {
    throw ValueErrorException("bad index");
  }
  boost::uint64_t offset = which * dp_impl->numBytesStoredPerFingerprint;
  if (!dp_impl->df_lazy) {
    fpData = const_cast<boost::uint8_t *>(dp_impl->dp_fpData) + offset;
  } else {
    dp_impl->istrm->seekg(dp_impl->fpDataOffset +
                          static_cast<std::streampos>(offset));
    dp_impl->istrm->read(reinterpret_cast<char *>(fpData),
                         nToRead * dp_impl->numBytesStoredPerFingerprint);
  }
};
  // REVIEW: the poolSize can be pulled from the numeric array
  RDKit::VECT_INT_VECT HierarchicalClusters(HierarchicalClusterPicker *picker,
                                   python::object &distMat,
                                   int poolSize, 
                                   int pickSize) {
    if (!PyArray_Check(distMat.ptr())){
      throw ValueErrorException("distance mat argument must be a numpy matrix");
    }


    // REVIEW: check pickSize < poolSize, otherwise throw_value_error()
    PyArrayObject *copy;
    // it's painful to have to copy the input matrix, but the
    // picker itself will step on the distance matrix, so use
    // CopyFromObject here instead of ContiguousFromObject
    copy = (PyArrayObject *)PyArray_CopyFromObject(distMat.ptr(), 
						   PyArray_DOUBLE, 1,1);
    double *dMat = (double *)copy->data;

    RDKit::VECT_INT_VECT res=picker->cluster(dMat, poolSize, pickSize);
    Py_DECREF(copy);
    return res;
  }
示例#16
0
  void DiscreteValueVect::initFromText(const char *pkl,const unsigned int len){
    std::stringstream ss(std::ios_base::binary|std::ios_base::in|std::ios_base::out);
    ss.write(pkl,len);
    boost::int32_t tVers;
    streamRead(ss,tVers);
    tVers *= -1;
    if(tVers==0x1){
      
    } else {
      throw ValueErrorException("bad version in DiscreteValueVect pickle");
    }
    boost::uint32_t tInt;
    streamRead(ss,tInt);
    d_type=static_cast<DiscreteValueType>(tInt);

    
    streamRead(ss,tInt);
    d_bitsPerVal=tInt;
    d_valsPerInt = BITS_PER_INT/d_bitsPerVal;
    streamRead(ss,tInt);
    d_mask=tInt;
    streamRead(ss,tInt);
    d_length=tInt;
    streamRead(ss,tInt);
    d_numInts=tInt;
    boost::uint32_t *data = new boost::uint32_t[d_numInts];
    ss.read((char *)data,d_numInts*sizeof(boost::uint32_t));

#if defined(BOOST_BIG_ENDIAN)
    boost::uint32_t *td = new boost::uint32_t[d_numInts];
    for(unsigned int i=0;i<d_numInts;++i) td[i]=EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(data[i]);
    d_data.reset(td);
    delete [] data;
#else    
    d_data.reset(data);
#endif    

  };
示例#17
0
  void dfsBuildStack(ROMol &mol,int atomIdx,int inBondIdx,
                     std::vector<AtomColors> &colors,
                     VECT_INT_VECT &cycles,
                     const UINT_VECT &ranks,
                     INT_VECT &cyclesAvailable,
                     MolStack &molStack,
                     INT_VECT &atomOrders,
                     INT_VECT &bondVisitOrders,
                     VECT_INT_VECT &atomRingClosures,
                     std::vector<INT_LIST> &atomTraversalBondOrder,
                     const boost::dynamic_bitset<> *bondsInPlay,
                     const std::vector<std::string> *bondSymbols
                     ){
#if 0
    std::cerr<<"traverse from atom: "<<atomIdx<<" via bond "<<inBondIdx<<" num cycles available: "
             <<std::count(cyclesAvailable.begin(),cyclesAvailable.end(),1)<<std::endl;
#endif
    Atom *atom = mol.getAtomWithIdx(atomIdx);
    INT_LIST directTravList,cycleEndList;
    boost::dynamic_bitset<> seenFromHere(mol.getNumAtoms());
    
    seenFromHere.set(atomIdx);
    molStack.push_back(MolStackElem(atom));
    atomOrders[atom->getIdx()] = molStack.size();
    colors[atomIdx] = GREY_NODE;

    INT_LIST travList;
    if(inBondIdx>=0) travList.push_back(inBondIdx);

    
    // ---------------------
    //
    //  Add any ring closures
    //
    // ---------------------
    if(atomRingClosures[atomIdx].size()){
      std::vector<unsigned int> ringsClosed;
      BOOST_FOREACH(int bIdx,atomRingClosures[atomIdx]){
        travList.push_back(bIdx);
        Bond *bond = mol.getBondWithIdx(bIdx);
        seenFromHere.set(bond->getOtherAtomIdx(atomIdx));
        unsigned int ringIdx;
        if(bond->getPropIfPresent(common_properties::_TraversalRingClosureBond, ringIdx)){
          // this is end of the ring closure
          // we can just pull the ring index from the bond itself:
          molStack.push_back(MolStackElem(bond,atomIdx));
          bondVisitOrders[bIdx]=molStack.size();
          molStack.push_back(MolStackElem(ringIdx));
          // don't make the ring digit immediately available again: we don't want to have the same
          // ring digit opening and closing rings on an atom.
          ringsClosed.push_back(ringIdx-1);
        } else {
          // this is the beginning of the ring closure, we need to come up with a ring index:
          INT_VECT::const_iterator cAIt=std::find(cyclesAvailable.begin(),
                                                  cyclesAvailable.end(),1);
          if(cAIt==cyclesAvailable.end()){
            throw ValueErrorException("Too many rings open at once. SMILES cannot be generated.");
          }
          unsigned int lowestRingIdx =  cAIt-cyclesAvailable.begin();
          cyclesAvailable[lowestRingIdx] = 0;
          ++lowestRingIdx;
          bond->setProp(common_properties::_TraversalRingClosureBond,lowestRingIdx);
          molStack.push_back(MolStackElem(lowestRingIdx));
        }
      }
示例#18
0
std::string extractId(const FPBReader_impl *dp_impl, unsigned int which) {
  PRECONDITION(dp_impl, "bad reader pointer");
  PRECONDITION((dp_impl->df_lazy || dp_impl->dp_idOffsets),
               "bad idOffsets pointer");
  PRECONDITION(!dp_impl->df_lazy || dp_impl->istrm, "no stream in lazy mode");

  if (which >= dp_impl->num4ByteElements + dp_impl->num8ByteElements) {
    throw ValueErrorException("bad index");
  }
  std::string res;

  boost::uint64_t offset = 0, len = 0;
  if (which < dp_impl->num4ByteElements) {
    if (!dp_impl->df_lazy) {
      offset = *reinterpret_cast<const boost::uint32_t *>(
          dp_impl->dp_idOffsets + which * 4);
      len = *reinterpret_cast<const boost::uint32_t *>(dp_impl->dp_idOffsets +
                                                       (which + 1) * 4);
    } else {
      dp_impl->istrm->seekg(dp_impl->idDataOffset +
                            static_cast<std::streampos>(which * 4));
      dp_impl->istrm->read(reinterpret_cast<char *>(&offset), 4);
      dp_impl->istrm->read(reinterpret_cast<char *>(&len), 4);
    }
  } else if (which == dp_impl->num4ByteElements) {
    // FIX: this code path is not yet tested
    if (!dp_impl->df_lazy) {
      offset = *reinterpret_cast<const boost::uint32_t *>(
          dp_impl->dp_idOffsets + which * 4);
      len = *reinterpret_cast<const boost::uint64_t *>(dp_impl->dp_idOffsets +
                                                       (which + 1) * 4);
    } else {
      dp_impl->istrm->seekg(dp_impl->idDataOffset +
                            static_cast<std::streampos>(which * 4));
      dp_impl->istrm->read(reinterpret_cast<char *>(&offset), 4);
      dp_impl->istrm->read(reinterpret_cast<char *>(&len), 8);
    }
  } else {
    // FIX: this code path is not yet tested
    if (!dp_impl->df_lazy) {
      offset = *reinterpret_cast<const boost::uint64_t *>(
          dp_impl->dp_idOffsets + dp_impl->num4ByteElements * 4 + which * 8);
      len = *reinterpret_cast<const boost::uint64_t *>(
          dp_impl->dp_idOffsets + dp_impl->num4ByteElements * 4 +
          (which + 1) * 8);
    } else {
      dp_impl->istrm->seekg(dp_impl->idDataOffset +
                            static_cast<std::streampos>(
                                dp_impl->num4ByteElements * 4 + which * 8));
      dp_impl->istrm->read(reinterpret_cast<char *>(&offset), 8);
      dp_impl->istrm->read(reinterpret_cast<char *>(&len), 8);
    }
  }
  len -= offset;

  if (!dp_impl->df_lazy) {
    res = std::string(
        reinterpret_cast<const char *>(dp_impl->dp_idChunk.get() + offset),
        len);
  } else {
    boost::shared_array<char> buff(new char[len + 1]);
    buff[len] = 0;
    dp_impl->istrm->seekg(dp_impl->idChunkOffset +
                          static_cast<std::streampos>(offset));
    dp_impl->istrm->read(reinterpret_cast<char *>(buff.get()), len);
    res = std::string(reinterpret_cast<const char *>(buff.get()));
  }
  return res;
};
示例#19
0
  double *InfoBitRanker::getTopN(unsigned int num) {
    // this is a place holder to pass along to infogain function
    // the size of this container should nVals*d_classes, where nVals
    // is the number of values a variable can take.
    // since we are dealing with a binary bit vector nVals = 2
    // in addition the infogain function pretends that this is a 2D matrix
    // with the number of rows equal to nVals and num of columns equal to 
    // d_classes
    if(num>d_dims) throw ValueErrorException("attempt to rank more bits than present in the bit vectors");
    if(dp_maskBits)
      CHECK_INVARIANT(num <= dp_maskBits->getNumOnBits(), "Can't rank more bits than the ensemble size"); 
    RDKit::USHORT *resMat = new RDKit::USHORT[2*d_classes];
    
    PR_QUEUE topN;

    for (unsigned int i = 0; i < d_dims; i++) {
      // we may want to ignore bits that are not turned on in any item of class 
      // "ignoreNoClass"
      /*
      if ((0 <= ignoreNoClass) && (d_classes > ignoreNoClass)) {
        if (d_counts[ignoreNoClass][i] == 0) {
          continue;
        }
        }*/
      
      
      if (dp_maskBits && !dp_maskBits->getBit(i)) {
           continue;
      }

      // fill up dmat
      for (unsigned int j = 0; j < d_classes; j++) {
        // we know that we have only two rows here
        resMat[j] = d_counts[j][i];
        resMat[d_classes + j] = (d_clsCount[j] - d_counts[j][i]);
      }
      double info = 0.0;
      switch (d_type) {
      case ENTROPY:
        info = InfoEntropyGain(resMat, 2, d_classes);
        break;
      case BIASENTROPY:
        info = this->BiasInfoEntropyGain(resMat);
        break;
      case CHISQUARE:
        info = ChiSquare(resMat, 2, d_classes);
        break;
      case BIASCHISQUARE:
        info = BiasChiSquareGain(resMat);
        break;
      default:
        break;
      }

      PAIR_D_I entry(info, i);
      
      if (info >= 0.0) {
        if (topN.size() < num) {
          topN.push(entry);
        }
        else if (info > topN.top().first) {
          topN.pop();
          topN.push(entry);
        }
      }
    }
    
    delete [] resMat;
    
    // now fill up the result matrix for the topN bits
    // the result from this function is a double * of size 
    // num*4. The caller of this function interprets this
    // array as a two dimensional array of size num*(2+d_classes) with each row
    // containing the following entries 
    //   bitId, infogain, 1 additional column for number of hits for each class
    //double *res = new double[num*(2+d_classes)];
    
    d_top = num;
    int ncols = 2+d_classes;
    
    delete [] dp_topBits;
    dp_topBits = new double[num*ncols];
    
    int offset, bid;
    
    RDKit::INT_VECT maskBits;
    if (dp_maskBits && topN.size() < num) {
      dp_maskBits->getOnBits(maskBits);
    }

    for (int i = num - 1; i >= 0; i--) {
      offset = i*ncols;
      if (topN.size() == 0 ) {
        if (dp_maskBits) {
              bid = maskBits[i];
        } else {
              bid = i;
        }
        dp_topBits[offset + 1] = 0.0;
      } else {
        bid = topN.top().second; // bit id
        dp_topBits[offset + 1] = topN.top().first; // value of the infogain
        topN.pop();
      }
      dp_topBits[offset] = (double)bid;
      
      for (unsigned int j = 0; j < d_classes; j++) {
        dp_topBits[offset + 2 + j] = (double)d_counts[j][bid];
      }
    }
    return dp_topBits;
  }
示例#20
0
void BitVect::initFromText(const char *data,const unsigned int dataLen,
                   bool isBase64,bool allowOldFormat){
  std::stringstream ss(std::ios_base::binary|std::ios_base::in|std::ios_base::out);
  if(isBase64){
    unsigned int actualLen;
    char *decoded;
    decoded = Base64Decode((const char *)data,&actualLen);
    ss.write(decoded,actualLen);
    delete [] decoded;
  } else {
    ss.write(data,dataLen);
  }

  boost::int32_t format=0;
  boost::uint32_t nOn=0;
  boost::int32_t size;
  boost::int32_t version=0;
  
  // earlier versions of the code did not have the version number encoded, so
  //  we'll use that to distinguish version 0
  RDKit::streamRead(ss,size);
  if(size<0){
    version = -1*size;
    if (version == 16) {
      format=1;
    }
    else if (version == 32) {
      format=2;
    }
    else {
      throw ValueErrorException("bad version in BitVect pickle");
    }
    RDKit::streamRead(ss,size);
  } else if( !allowOldFormat ) {
    throw ValueErrorException("invalid BitVect pickle");
  }

  RDKit::streamRead(ss,nOn);
  _initForSize(static_cast<int>(size));

  // if the either have older version or or version 16 with ints for on bits
  if( (format==0) || 
      ( (format == 1) && (size >= std::numeric_limits<unsigned short>::max()) ) ) {
    boost::uint32_t tmp;
    for(unsigned int i=0; i<nOn; i++){
      RDKit::streamRead(ss,tmp);
      setBit(tmp);
    }
  } else if (format == 1) { // version 16 and on bits stored as short ints
    boost::uint16_t tmp;
    for(unsigned int i=0; i<nOn; i++){
      RDKit::streamRead(ss,tmp);
      setBit(tmp);
    }
  } else if (format == 2) { // run length encoded format
    boost::uint32_t curr=0;
    for (unsigned int i=0; i<nOn; i++) {
      curr += RDKit::readPackedIntFromStream(ss);
      setBit(curr);
      curr++;
    }
  }

}
示例#21
0
  void canonicalDFSTraversal(ROMol &mol,int atomIdx,int inBondIdx,
                             std::vector<AtomColors> &colors,
                             VECT_INT_VECT &cycles,
                             INT_VECT &ranks,
                             INT_VECT &cyclesAvailable,
                             MolStack &molStack,
                             INT_VECT &atomOrders,
                             INT_VECT &bondVisitOrders,
                             VECT_INT_VECT &atomRingClosures,
                             std::vector<INT_LIST> &atomTraversalBondOrder,
                             const boost::dynamic_bitset<> *bondsInPlay,
                             const std::vector<std::string> *bondSymbols
                             ){
    PRECONDITION(colors.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(ranks.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(atomOrders.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(bondVisitOrders.size()>=mol.getNumBonds(),"vector too small");
    PRECONDITION(atomRingClosures.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(atomTraversalBondOrder.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(!bondsInPlay || bondsInPlay->size()>=mol.getNumBonds(),"bondsInPlay too small");
    PRECONDITION(!bondSymbols || bondSymbols->size()>=mol.getNumBonds(),"bondSymbols too small");

    int nAttached=0;

    Atom *atom = mol.getAtomWithIdx(atomIdx);
    INT_LIST directTravList,cycleEndList;

    molStack.push_back(MolStackElem(atom));
    atomOrders[atom->getIdx()] = molStack.size();
    colors[atomIdx] = GREY_NODE;

    // ---------------------
    //
    //  Build the list of possible destinations from here
    //
    // ---------------------
    std::vector< PossibleType > possibles;
    possibles.resize(0);
    ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom);
    possibles.reserve(bondsPair.second-bondsPair.first);

    while(bondsPair.first != bondsPair.second){
      BOND_SPTR theBond = mol[*(bondsPair.first)];
      bondsPair.first++;
      if(bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue;
      if(inBondIdx<0 || theBond->getIdx() != static_cast<unsigned int>(inBondIdx)){
        int otherIdx = theBond->getOtherAtomIdx(atomIdx);
        long rank=ranks[otherIdx];
        // ---------------------
        //
        // things are a bit more complicated if we are sitting on a
        // ring atom we would like to traverse first to the
        // ring-closure atoms, then to atoms outside the ring first,
        // then to atoms in the ring that haven't already been visited
        // (non-ring-closure atoms).
        // 
        //  Here's how the black magic works:
        //   - non-ring atom neighbors have their original ranks
        //   - ring atom neighbors have this added to their ranks:
        //       (Bond::OTHER - bondOrder)*MAX_NATOMS*MAX_NATOMS
        //   - ring-closure neighbors lose a factor of:
        //       (Bond::OTHER+1)*MAX_NATOMS*MAX_NATOMS
        //
        //  This tactic biases us to traverse to non-ring neighbors first,
        //  original ordering if bond orders are all equal... crafty, neh?
        //  
        // ---------------------
        if( colors[otherIdx] == GREY_NODE ) {
          rank -= static_cast<int>(Bond::OTHER+1) *
            MAX_NATOMS*MAX_NATOMS;
          if(!bondSymbols){
            rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
              MAX_NATOMS;
          } else {
            const std::string &symb=(*bondSymbols)[theBond->getIdx()];
            boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end());
            rank += (hsh%MAX_NATOMS) *  MAX_NATOMS;
          }
        } else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){
          if(!bondSymbols){
            rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
              MAX_NATOMS*MAX_NATOMS;
          } else {
            const std::string &symb=(*bondSymbols)[theBond->getIdx()];
            boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end());
            rank += (hsh%MAX_NATOMS)*MAX_NATOMS*MAX_NATOMS;
          }
        }
        possibles.push_back(PossibleType(rank,otherIdx,theBond.get()));
      }
    }

    // ---------------------
    //
    //  Sort on ranks
    //
    // ---------------------
    std::sort(possibles.begin(),possibles.end(),_possibleComp);


    // ---------------------
    //
    //  Now work the children
    //
    // ---------------------
    std::vector<MolStack> subStacks;
    for(std::vector<PossibleType>::iterator possiblesIt=possibles.begin();
        possiblesIt!=possibles.end();
        possiblesIt++){
      MolStack subStack;
#if 0
      int possibleIdx = possiblesIt->second.first;
      Bond *bond = possiblesIt->second.second;
#endif
      int possibleIdx = possiblesIt->get<1>();
      Bond *bond = possiblesIt->get<2>();
      Atom *otherAtom=mol.getAtomWithIdx(possibleIdx);
      unsigned int lowestRingIdx;
      INT_VECT::const_iterator cAIt;
      switch(colors[possibleIdx]){
      case WHITE_NODE:
        // -----
        // we haven't seen this node at all before
        // -----

        // it might have some residual data from earlier calls, clean that up:
        if(otherAtom->hasProp("_TraversalBondIndexOrder")){
          otherAtom->clearProp("_TraversalBondIndexOrder");
        }

        directTravList.push_back(bond->getIdx());
        subStack.push_back(MolStackElem(bond,atomIdx));
        canonicalDFSTraversal(mol,possibleIdx,bond->getIdx(),colors,
                              cycles,ranks,cyclesAvailable,subStack,
                              atomOrders,bondVisitOrders,atomRingClosures,atomTraversalBondOrder,
                              bondsInPlay,bondSymbols);
        subStacks.push_back(subStack);
        nAttached += 1;
        break;
      case GREY_NODE:
        // -----
        // we've seen this, but haven't finished it (we're finishing a ring)
        // -----
        cycleEndList.push_back(bond->getIdx());
        cAIt=std::find(cyclesAvailable.begin(),
                       cyclesAvailable.end(),1);
        if(cAIt==cyclesAvailable.end()){
          throw ValueErrorException("Too many rings open at once. SMILES cannot be generated.");
        }
        lowestRingIdx =  cAIt-cyclesAvailable.begin();
        cyclesAvailable[lowestRingIdx] = 0;
        cycles[possibleIdx].push_back(lowestRingIdx);
        ++lowestRingIdx;

        bond->setProp("_TraversalRingClosureBond",lowestRingIdx);
        molStack.push_back(MolStackElem(bond,
                                        atom->getIdx()));
        molStack.push_back(MolStackElem(lowestRingIdx));

        // we need to add this bond (which closes the ring) to the traversal list for the
        // other atom as well:
        atomTraversalBondOrder[otherAtom->getIdx()].push_back(bond->getIdx());
        atomRingClosures[otherAtom->getIdx()].push_back(bond->getIdx());

        break;
      default:
        // -----
        // this node has been finished. don't do anything.
        // -----
        break;
      }
    }
    

    INT_VECT &ringClosures=atomRingClosures[atom->getIdx()];
    
    CHECK_INVARIANT(ringClosures.size()==cycles[atomIdx].size(),
                    "ring closure mismatch");
    for(unsigned int i=0;i<ringClosures.size();i++){
      int ringIdx=cycles[atomIdx][i];
      ringIdx += 1;
      molStack.push_back(MolStackElem(ringIdx));
    }
    cycles[atomIdx].resize(0);
  
    MolStack::const_iterator ciMS;
    for(int i=0;i<nAttached;i++){
      if(i<nAttached-1){
        int branchIdx=0;
        if(subStacks[i].begin()->type==MOL_STACK_ATOM){
          branchIdx=subStacks[i].begin()->obj.atom->getIdx();
        } else if(subStacks[i].begin()->type==MOL_STACK_BOND){
          branchIdx=-1*subStacks[i].begin()->obj.bond->getIdx();
        } else {
          ASSERT_INVARIANT(0,"branch started with something other than an atom or bond");
        }
        molStack.push_back(MolStackElem("(",branchIdx));
        for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){
          molStack.push_back(*ciMS);
          switch(ciMS->type){
          case MOL_STACK_ATOM:
            atomOrders[ciMS->obj.atom->getIdx()] = molStack.size();
            break;
          case MOL_STACK_BOND:
            bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size();
            break;
          default:
            break;
          }
        }
        molStack.push_back(MolStackElem(")",branchIdx));
      } else {
        for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){
          molStack.push_back(*ciMS);
          switch(ciMS->type){
          case MOL_STACK_ATOM:
            atomOrders[ciMS->obj.atom->getIdx()] = molStack.size();
            break;
          case MOL_STACK_BOND:
            bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size();
            break;
          default:
            break;
          }
        }
      }
    }

    //std::cerr<<"*****>>>>>> Traversal results for atom: "<<atom->getIdx()<<"> ";
    INT_LIST travList;
    // first push on the incoming bond:
    if(inBondIdx >= 0){
      //std::cerr<<" "<<inBondIdx;
      travList.push_back(inBondIdx);
    }

    // ... ring closures that end here:
    for(INT_LIST_CI ilci=cycleEndList.begin();ilci!=cycleEndList.end();++ilci){
      //std::cerr<<" ["<<*ilci<<"]";
      travList.push_back(*ilci);
    }


    // ... ring closures that start here:
    // if(atom->hasProp("_TraversalBondIndexOrder")){
    //   INT_LIST indirectTravList;
    //   atom->getProp("_TraversalBondIndexOrder",indirectTravList);
    //   for(INT_LIST_CI ilci=indirectTravList.begin();ilci!=indirectTravList.end();++ilci){
    //     //std::cerr<<" ("<<*ilci<<")";
    //     travList.push_back(*ilci);
    //   }
    // }
    BOOST_FOREACH(int ili,atomTraversalBondOrder[atom->getIdx()]){
      travList.push_back(ili);
    }