// REVIEW: the poolSize can be pulled from the numeric array RDKit::INT_VECT MaxMinPicks(MaxMinPicker *picker, python::object distMat, int poolSize, int pickSize, python::object firstPicks, int seed) { if (pickSize >= poolSize) { throw ValueErrorException("pickSize must be less than poolSize"); } if (!PyArray_Check(distMat.ptr())) { throw ValueErrorException("distance mat argument must be a numpy matrix"); } PyArrayObject *copy; copy = (PyArrayObject *)PyArray_ContiguousFromObject(distMat.ptr(), PyArray_DOUBLE, 1, 1); double *dMat = (double *)copy->data; RDKit::INT_VECT firstPickVect; for (unsigned int i = 0; i < python::extract<unsigned int>(firstPicks.attr("__len__")()); ++i) { firstPickVect.push_back(python::extract<int>(firstPicks[i])); } RDKit::INT_VECT res = picker->pick(dMat, poolSize, pickSize, firstPickVect, seed); Py_DECREF(copy); return res; }
unsigned int computeL1Norm(const DiscreteValueVect &v1, const DiscreteValueVect &v2) { if (v1.getLength() != v2.getLength()) { throw ValueErrorException("Comparing vectors of different lengths"); } DiscreteValueVect::DiscreteValueType valType = v1.getValueType(); if (valType != v2.getValueType()) { throw ValueErrorException("Comparing vector of different value types"); } const boost::uint32_t* data1 = v1.getData(); const boost::uint32_t* data2 = v2.getData(); unsigned int res = 0; if (valType <= DiscreteValueVect::EIGHTBITVALUE) { DiscreteDistMat *dmat = getDiscreteDistMat(); unsigned char *cd1 = (unsigned char *)(data1); unsigned char *cd2 = (unsigned char *)(data2); const unsigned char *cend = cd1 + (v1.getNumInts()*4); while (cd1 != cend) { if (*cd1 == *cd2) { cd1++; cd2++; continue; } res += dmat->getDist(*cd1, *cd2, valType); cd1++; cd2++; } } else { // we have a sixteen bits per value type // REVIEW: we are making an assumption here that a short // is 16 bit - may fail on a different compiler const unsigned short int *sd1 = (unsigned short int *)(data1); const unsigned short int *sd2 = (unsigned short int *)(data2); const unsigned short int *send = sd1 + (v1.getNumInts()*2); while (sd1 != send) { if (*sd1 == *sd2) { sd1++; sd2++; continue; } res += abs((*sd1) - (*sd2)); sd1++; sd2++; } } return res; }
void UniformGrid3D::initFromText(const char *pkl,const unsigned int length){ std::stringstream ss(std::ios_base::binary|std::ios_base::in|std::ios_base::out); ss.write(pkl,length); boost::int32_t tVers; streamRead(ss,tVers); tVers *= -1; if(tVers==0x1){ } else { throw ValueErrorException("bad version in UniformGrid3D pickle"); } boost::uint32_t tInt; streamRead(ss,tInt); d_numX=tInt; streamRead(ss,tInt); d_numY=tInt; streamRead(ss,tInt); d_numZ=tInt; streamRead(ss,d_spacing); double oX,oY,oZ; streamRead(ss,oX); streamRead(ss,oY); streamRead(ss,oZ); d_offSet = Point3D(oX,oY,oZ); boost::uint32_t pklSz; streamRead(ss,pklSz); char *buff = new char[pklSz]; ss.read(buff,pklSz*sizeof(char)); if(dp_storage) delete dp_storage; dp_storage = new RDKit::DiscreteValueVect(buff,pklSz); delete [] buff; }
void extractPopCounts(FPBReader_impl *dp_impl, boost::uint64_t sz, const boost::uint8_t *chunk) { PRECONDITION(dp_impl, "bad pointer"); /* this section of the FPB format is under-documented in Andrew's code, * fortunately it looks pretty simple */ if (sz % 4) throw ValueErrorException("POPC chunk size must be a multiple of 4 bytes"); unsigned int nEntries = sz / 4; if (nEntries < 9) throw ValueErrorException("POPC must contain at least 9 offsets"); dp_impl->popCountOffsets.reserve(nEntries); for (unsigned int i = 0; i < nEntries; ++i) { dp_impl->popCountOffsets.push_back( *reinterpret_cast<const boost::uint32_t *>(chunk)); chunk += 4; } };
// """ ------------------------------------------------------- // // getOnBits(IntVect &which) // C++: Passes the set of on bits out in the IntVect passed in. // The contents of IntVect are destroyed. // // Python: Returns the tuple of on bits // // """ ------------------------------------------------------- void SparseBitVect::getOnBits(IntVect &v) const { if (!dp_bits) { throw ValueErrorException("BitVect not properly initialized."); } unsigned int nOn = getNumOnBits(); if (!v.empty()) IntVect().swap(v); v.reserve(nOn); v.resize(nOn); std::copy(dp_bits->begin(), dp_bits->end(), v.begin()); };
// """ ------------------------------------------------------- // // setBit(const IntSetIter which) (C++ SPECIFIC) // Sets bit which to be on. // // Returns the original state of the bit // // """ ------------------------------------------------------- bool SparseBitVect::setBit(const IntSetIter which) { if (!dp_bits) { throw ValueErrorException("BitVect not properly initialized."); } std::pair<IntSetIter, bool> res; if (*which < 0 || static_cast<unsigned int>(*which) >= d_size) { throw IndexErrorException(*which); } res = dp_bits->insert(*which); return !(res.second); }
// REVIEW: the poolSize can be pulled from the numeric array RDKit::INT_VECT HierarchicalPicks(HierarchicalClusterPicker *picker, python::object &distMat, int poolSize, int pickSize) { if (pickSize >= poolSize) { throw ValueErrorException("pickSize must be less than poolSize"); } if (!PyArray_Check(distMat.ptr())) { throw ValueErrorException("distance mat argument must be a numpy matrix"); } PyArrayObject *copy; // it's painful to have to copy the input matrix, but the // picker itself will step on the distance matrix, so use // CopyFromObject here instead of ContiguousFromObject copy = (PyArrayObject *)PyArray_CopyFromObject(distMat.ptr(), NPY_DOUBLE, 1, 1); double *dMat = (double *)PyArray_DATA(copy); RDKit::INT_VECT res = picker->pick(dMat, poolSize, pickSize); Py_DECREF(copy); return res; }
void DiscreteValueVect::setVal(unsigned int i, unsigned int val) { if(i >= d_length){ throw IndexErrorException(i); } if ((val & d_mask) != val) { throw ValueErrorException("Value out of range"); } unsigned int shift = d_bitsPerVal*(i%d_valsPerInt); unsigned int intId = i/d_valsPerInt; unsigned int mask = ((1<<d_bitsPerVal) -1) << shift; mask = ~mask; d_data[intId] = (d_data[intId]&mask)|(val << shift); }
static inline void processCuts( size_t i, size_t minCuts, size_t maxCuts, BondVector_t& bonds_selected, const std::vector<BondVector_t>& matching_bonds, const ROMol& mol, std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >& res) { if(maxCuts < minCuts) throw ValueErrorException("supplied maxCuts is less than minCuts"); if(minCuts==0) throw ValueErrorException("minCuts must be greater than 0"); for (size_t x = i; x < matching_bonds.size(); x++) { appendBonds(bonds_selected, matching_bonds[x]); if(bonds_selected.size() >= minCuts) { addResult(res, mol, bonds_selected, maxCuts); } if (bonds_selected.size() < maxCuts) { processCuts(x + 1, minCuts, maxCuts, bonds_selected, matching_bonds, mol, res); } bonds_selected.pop_back(); } }
void fragmentOnSomeBonds( const ROMol &mol, const std::vector<unsigned int> &bondIndices, std::vector<ROMOL_SPTR> &resMols, unsigned int maxToCut, bool addDummies, const std::vector<std::pair<unsigned int, unsigned int>> *dummyLabels, const std::vector<Bond::BondType> *bondTypes, std::vector<std::vector<unsigned int>> *nCutsPerAtom) { PRECONDITION((!dummyLabels || dummyLabels->size() == bondIndices.size()), "bad dummyLabel vector"); PRECONDITION((!bondTypes || bondTypes->size() == bondIndices.size()), "bad bondType vector"); if (bondIndices.size() > 63) throw ValueErrorException("currently can only fragment on up to 63 bonds"); if (!maxToCut || !mol.getNumAtoms() || !bondIndices.size()) return; boost::uint64_t state = (0x1L << maxToCut) - 1; boost::uint64_t stop = 0x1L << bondIndices.size(); std::vector<unsigned int> fragmentHere(maxToCut); std::vector<std::pair<unsigned int, unsigned int>> *dummyLabelsHere = nullptr; if (dummyLabels) { dummyLabelsHere = new std::vector<std::pair<unsigned int, unsigned int>>(maxToCut); } std::vector<Bond::BondType> *bondTypesHere = nullptr; if (bondTypes) { bondTypesHere = new std::vector<Bond::BondType>(maxToCut); } while (state < stop) { unsigned int nSeen = 0; for (unsigned int i = 0; i < bondIndices.size() && nSeen < maxToCut; ++i) { if (state & (0x1L << i)) { fragmentHere[nSeen] = bondIndices[i]; if (dummyLabelsHere) (*dummyLabelsHere)[nSeen] = (*dummyLabels)[i]; if (bondTypesHere) (*bondTypesHere)[nSeen] = (*bondTypes)[i]; ++nSeen; } } std::vector<unsigned int> *lCutsPerAtom = nullptr; if (nCutsPerAtom) { nCutsPerAtom->push_back(std::vector<unsigned int>(mol.getNumAtoms())); lCutsPerAtom = &(nCutsPerAtom->back()); } ROMol *nm = fragmentOnBonds(mol, fragmentHere, addDummies, dummyLabelsHere, bondTypesHere, lCutsPerAtom); resMols.push_back(ROMOL_SPTR(nm)); state = nextBitCombo(state); } delete dummyLabelsHere; delete bondTypesHere; }
// """ ------------------------------------------------------- // // Sets bit which to be off. // // Returns the original state of the bit // // """ ------------------------------------------------------- bool SparseBitVect::unsetBit(const unsigned int which) { if (!dp_bits) { throw ValueErrorException("BitVect not properly initialized."); } if (which >= d_size) { throw IndexErrorException(which); } if (dp_bits->count(which)) { dp_bits->erase(dp_bits->find(which)); return true; } else { return false; } }
double tversky(const FPBReader_impl *dp_impl, unsigned int which, const ::boost::uint8_t *bv, double ca, double cb) { PRECONDITION(dp_impl, "bad reader pointer"); PRECONDITION(bv, "bad bv pointer"); if (which >= dp_impl->len) { throw ValueErrorException("bad index"); } boost::uint8_t *fpData; if (dp_impl->df_lazy) { fpData = new boost::uint8_t[dp_impl->numBytesStoredPerFingerprint]; } extractBytes(dp_impl, which, fpData); double res = CalcBitmapTversky(fpData, bv, dp_impl->numBytesStoredPerFingerprint, ca, cb); if (dp_impl->df_lazy) delete[] fpData; return res; };
std::vector<int> stringToCharge(std::string charge_str) { std::vector<int> charges; for (const auto& c : charge_str) { switch (c) { case '+': charges.push_back(1); break; case '0': charges.push_back(0); break; case '-': charges.push_back(-1); break; default: throw ValueErrorException("Charge symbol not recognised."); } } return charges; }
// if dp_impl->df_lazy is true, we'll use the memory in fpData (should be large // enough to hold the result!), otherwise // we update it to a pointer to the memory dp_impl owns. void extractBytes(const FPBReader_impl *dp_impl, unsigned int which, boost::uint8_t *&fpData, unsigned int nToRead = 1) { PRECONDITION(dp_impl, "bad reader pointer"); PRECONDITION((dp_impl->df_lazy || dp_impl->dp_fpData), "bad fpdata pointer"); PRECONDITION(!dp_impl->df_lazy || dp_impl->istrm, "no stream in lazy mode"); PRECONDITION(!dp_impl->df_lazy || fpData, "no fpData in lazy mode"); PRECONDITION(nToRead > 0, "bad nToRead"); if (which + nToRead > dp_impl->len) { throw ValueErrorException("bad index"); } boost::uint64_t offset = which * dp_impl->numBytesStoredPerFingerprint; if (!dp_impl->df_lazy) { fpData = const_cast<boost::uint8_t *>(dp_impl->dp_fpData) + offset; } else { dp_impl->istrm->seekg(dp_impl->fpDataOffset + static_cast<std::streampos>(offset)); dp_impl->istrm->read(reinterpret_cast<char *>(fpData), nToRead * dp_impl->numBytesStoredPerFingerprint); } };
// REVIEW: the poolSize can be pulled from the numeric array RDKit::VECT_INT_VECT HierarchicalClusters(HierarchicalClusterPicker *picker, python::object &distMat, int poolSize, int pickSize) { if (!PyArray_Check(distMat.ptr())){ throw ValueErrorException("distance mat argument must be a numpy matrix"); } // REVIEW: check pickSize < poolSize, otherwise throw_value_error() PyArrayObject *copy; // it's painful to have to copy the input matrix, but the // picker itself will step on the distance matrix, so use // CopyFromObject here instead of ContiguousFromObject copy = (PyArrayObject *)PyArray_CopyFromObject(distMat.ptr(), PyArray_DOUBLE, 1,1); double *dMat = (double *)copy->data; RDKit::VECT_INT_VECT res=picker->cluster(dMat, poolSize, pickSize); Py_DECREF(copy); return res; }
void DiscreteValueVect::initFromText(const char *pkl,const unsigned int len){ std::stringstream ss(std::ios_base::binary|std::ios_base::in|std::ios_base::out); ss.write(pkl,len); boost::int32_t tVers; streamRead(ss,tVers); tVers *= -1; if(tVers==0x1){ } else { throw ValueErrorException("bad version in DiscreteValueVect pickle"); } boost::uint32_t tInt; streamRead(ss,tInt); d_type=static_cast<DiscreteValueType>(tInt); streamRead(ss,tInt); d_bitsPerVal=tInt; d_valsPerInt = BITS_PER_INT/d_bitsPerVal; streamRead(ss,tInt); d_mask=tInt; streamRead(ss,tInt); d_length=tInt; streamRead(ss,tInt); d_numInts=tInt; boost::uint32_t *data = new boost::uint32_t[d_numInts]; ss.read((char *)data,d_numInts*sizeof(boost::uint32_t)); #if defined(BOOST_BIG_ENDIAN) boost::uint32_t *td = new boost::uint32_t[d_numInts]; for(unsigned int i=0;i<d_numInts;++i) td[i]=EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(data[i]); d_data.reset(td); delete [] data; #else d_data.reset(data); #endif };
void dfsBuildStack(ROMol &mol,int atomIdx,int inBondIdx, std::vector<AtomColors> &colors, VECT_INT_VECT &cycles, const UINT_VECT &ranks, INT_VECT &cyclesAvailable, MolStack &molStack, INT_VECT &atomOrders, INT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, std::vector<INT_LIST> &atomTraversalBondOrder, const boost::dynamic_bitset<> *bondsInPlay, const std::vector<std::string> *bondSymbols ){ #if 0 std::cerr<<"traverse from atom: "<<atomIdx<<" via bond "<<inBondIdx<<" num cycles available: " <<std::count(cyclesAvailable.begin(),cyclesAvailable.end(),1)<<std::endl; #endif Atom *atom = mol.getAtomWithIdx(atomIdx); INT_LIST directTravList,cycleEndList; boost::dynamic_bitset<> seenFromHere(mol.getNumAtoms()); seenFromHere.set(atomIdx); molStack.push_back(MolStackElem(atom)); atomOrders[atom->getIdx()] = molStack.size(); colors[atomIdx] = GREY_NODE; INT_LIST travList; if(inBondIdx>=0) travList.push_back(inBondIdx); // --------------------- // // Add any ring closures // // --------------------- if(atomRingClosures[atomIdx].size()){ std::vector<unsigned int> ringsClosed; BOOST_FOREACH(int bIdx,atomRingClosures[atomIdx]){ travList.push_back(bIdx); Bond *bond = mol.getBondWithIdx(bIdx); seenFromHere.set(bond->getOtherAtomIdx(atomIdx)); unsigned int ringIdx; if(bond->getPropIfPresent(common_properties::_TraversalRingClosureBond, ringIdx)){ // this is end of the ring closure // we can just pull the ring index from the bond itself: molStack.push_back(MolStackElem(bond,atomIdx)); bondVisitOrders[bIdx]=molStack.size(); molStack.push_back(MolStackElem(ringIdx)); // don't make the ring digit immediately available again: we don't want to have the same // ring digit opening and closing rings on an atom. ringsClosed.push_back(ringIdx-1); } else { // this is the beginning of the ring closure, we need to come up with a ring index: INT_VECT::const_iterator cAIt=std::find(cyclesAvailable.begin(), cyclesAvailable.end(),1); if(cAIt==cyclesAvailable.end()){ throw ValueErrorException("Too many rings open at once. SMILES cannot be generated."); } unsigned int lowestRingIdx = cAIt-cyclesAvailable.begin(); cyclesAvailable[lowestRingIdx] = 0; ++lowestRingIdx; bond->setProp(common_properties::_TraversalRingClosureBond,lowestRingIdx); molStack.push_back(MolStackElem(lowestRingIdx)); } }
std::string extractId(const FPBReader_impl *dp_impl, unsigned int which) { PRECONDITION(dp_impl, "bad reader pointer"); PRECONDITION((dp_impl->df_lazy || dp_impl->dp_idOffsets), "bad idOffsets pointer"); PRECONDITION(!dp_impl->df_lazy || dp_impl->istrm, "no stream in lazy mode"); if (which >= dp_impl->num4ByteElements + dp_impl->num8ByteElements) { throw ValueErrorException("bad index"); } std::string res; boost::uint64_t offset = 0, len = 0; if (which < dp_impl->num4ByteElements) { if (!dp_impl->df_lazy) { offset = *reinterpret_cast<const boost::uint32_t *>( dp_impl->dp_idOffsets + which * 4); len = *reinterpret_cast<const boost::uint32_t *>(dp_impl->dp_idOffsets + (which + 1) * 4); } else { dp_impl->istrm->seekg(dp_impl->idDataOffset + static_cast<std::streampos>(which * 4)); dp_impl->istrm->read(reinterpret_cast<char *>(&offset), 4); dp_impl->istrm->read(reinterpret_cast<char *>(&len), 4); } } else if (which == dp_impl->num4ByteElements) { // FIX: this code path is not yet tested if (!dp_impl->df_lazy) { offset = *reinterpret_cast<const boost::uint32_t *>( dp_impl->dp_idOffsets + which * 4); len = *reinterpret_cast<const boost::uint64_t *>(dp_impl->dp_idOffsets + (which + 1) * 4); } else { dp_impl->istrm->seekg(dp_impl->idDataOffset + static_cast<std::streampos>(which * 4)); dp_impl->istrm->read(reinterpret_cast<char *>(&offset), 4); dp_impl->istrm->read(reinterpret_cast<char *>(&len), 8); } } else { // FIX: this code path is not yet tested if (!dp_impl->df_lazy) { offset = *reinterpret_cast<const boost::uint64_t *>( dp_impl->dp_idOffsets + dp_impl->num4ByteElements * 4 + which * 8); len = *reinterpret_cast<const boost::uint64_t *>( dp_impl->dp_idOffsets + dp_impl->num4ByteElements * 4 + (which + 1) * 8); } else { dp_impl->istrm->seekg(dp_impl->idDataOffset + static_cast<std::streampos>( dp_impl->num4ByteElements * 4 + which * 8)); dp_impl->istrm->read(reinterpret_cast<char *>(&offset), 8); dp_impl->istrm->read(reinterpret_cast<char *>(&len), 8); } } len -= offset; if (!dp_impl->df_lazy) { res = std::string( reinterpret_cast<const char *>(dp_impl->dp_idChunk.get() + offset), len); } else { boost::shared_array<char> buff(new char[len + 1]); buff[len] = 0; dp_impl->istrm->seekg(dp_impl->idChunkOffset + static_cast<std::streampos>(offset)); dp_impl->istrm->read(reinterpret_cast<char *>(buff.get()), len); res = std::string(reinterpret_cast<const char *>(buff.get())); } return res; };
double *InfoBitRanker::getTopN(unsigned int num) { // this is a place holder to pass along to infogain function // the size of this container should nVals*d_classes, where nVals // is the number of values a variable can take. // since we are dealing with a binary bit vector nVals = 2 // in addition the infogain function pretends that this is a 2D matrix // with the number of rows equal to nVals and num of columns equal to // d_classes if(num>d_dims) throw ValueErrorException("attempt to rank more bits than present in the bit vectors"); if(dp_maskBits) CHECK_INVARIANT(num <= dp_maskBits->getNumOnBits(), "Can't rank more bits than the ensemble size"); RDKit::USHORT *resMat = new RDKit::USHORT[2*d_classes]; PR_QUEUE topN; for (unsigned int i = 0; i < d_dims; i++) { // we may want to ignore bits that are not turned on in any item of class // "ignoreNoClass" /* if ((0 <= ignoreNoClass) && (d_classes > ignoreNoClass)) { if (d_counts[ignoreNoClass][i] == 0) { continue; } }*/ if (dp_maskBits && !dp_maskBits->getBit(i)) { continue; } // fill up dmat for (unsigned int j = 0; j < d_classes; j++) { // we know that we have only two rows here resMat[j] = d_counts[j][i]; resMat[d_classes + j] = (d_clsCount[j] - d_counts[j][i]); } double info = 0.0; switch (d_type) { case ENTROPY: info = InfoEntropyGain(resMat, 2, d_classes); break; case BIASENTROPY: info = this->BiasInfoEntropyGain(resMat); break; case CHISQUARE: info = ChiSquare(resMat, 2, d_classes); break; case BIASCHISQUARE: info = BiasChiSquareGain(resMat); break; default: break; } PAIR_D_I entry(info, i); if (info >= 0.0) { if (topN.size() < num) { topN.push(entry); } else if (info > topN.top().first) { topN.pop(); topN.push(entry); } } } delete [] resMat; // now fill up the result matrix for the topN bits // the result from this function is a double * of size // num*4. The caller of this function interprets this // array as a two dimensional array of size num*(2+d_classes) with each row // containing the following entries // bitId, infogain, 1 additional column for number of hits for each class //double *res = new double[num*(2+d_classes)]; d_top = num; int ncols = 2+d_classes; delete [] dp_topBits; dp_topBits = new double[num*ncols]; int offset, bid; RDKit::INT_VECT maskBits; if (dp_maskBits && topN.size() < num) { dp_maskBits->getOnBits(maskBits); } for (int i = num - 1; i >= 0; i--) { offset = i*ncols; if (topN.size() == 0 ) { if (dp_maskBits) { bid = maskBits[i]; } else { bid = i; } dp_topBits[offset + 1] = 0.0; } else { bid = topN.top().second; // bit id dp_topBits[offset + 1] = topN.top().first; // value of the infogain topN.pop(); } dp_topBits[offset] = (double)bid; for (unsigned int j = 0; j < d_classes; j++) { dp_topBits[offset + 2 + j] = (double)d_counts[j][bid]; } } return dp_topBits; }
void BitVect::initFromText(const char *data,const unsigned int dataLen, bool isBase64,bool allowOldFormat){ std::stringstream ss(std::ios_base::binary|std::ios_base::in|std::ios_base::out); if(isBase64){ unsigned int actualLen; char *decoded; decoded = Base64Decode((const char *)data,&actualLen); ss.write(decoded,actualLen); delete [] decoded; } else { ss.write(data,dataLen); } boost::int32_t format=0; boost::uint32_t nOn=0; boost::int32_t size; boost::int32_t version=0; // earlier versions of the code did not have the version number encoded, so // we'll use that to distinguish version 0 RDKit::streamRead(ss,size); if(size<0){ version = -1*size; if (version == 16) { format=1; } else if (version == 32) { format=2; } else { throw ValueErrorException("bad version in BitVect pickle"); } RDKit::streamRead(ss,size); } else if( !allowOldFormat ) { throw ValueErrorException("invalid BitVect pickle"); } RDKit::streamRead(ss,nOn); _initForSize(static_cast<int>(size)); // if the either have older version or or version 16 with ints for on bits if( (format==0) || ( (format == 1) && (size >= std::numeric_limits<unsigned short>::max()) ) ) { boost::uint32_t tmp; for(unsigned int i=0; i<nOn; i++){ RDKit::streamRead(ss,tmp); setBit(tmp); } } else if (format == 1) { // version 16 and on bits stored as short ints boost::uint16_t tmp; for(unsigned int i=0; i<nOn; i++){ RDKit::streamRead(ss,tmp); setBit(tmp); } } else if (format == 2) { // run length encoded format boost::uint32_t curr=0; for (unsigned int i=0; i<nOn; i++) { curr += RDKit::readPackedIntFromStream(ss); setBit(curr); curr++; } } }
void canonicalDFSTraversal(ROMol &mol,int atomIdx,int inBondIdx, std::vector<AtomColors> &colors, VECT_INT_VECT &cycles, INT_VECT &ranks, INT_VECT &cyclesAvailable, MolStack &molStack, INT_VECT &atomOrders, INT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, std::vector<INT_LIST> &atomTraversalBondOrder, const boost::dynamic_bitset<> *bondsInPlay, const std::vector<std::string> *bondSymbols ){ PRECONDITION(colors.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(ranks.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(atomOrders.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(bondVisitOrders.size()>=mol.getNumBonds(),"vector too small"); PRECONDITION(atomRingClosures.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(atomTraversalBondOrder.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(!bondsInPlay || bondsInPlay->size()>=mol.getNumBonds(),"bondsInPlay too small"); PRECONDITION(!bondSymbols || bondSymbols->size()>=mol.getNumBonds(),"bondSymbols too small"); int nAttached=0; Atom *atom = mol.getAtomWithIdx(atomIdx); INT_LIST directTravList,cycleEndList; molStack.push_back(MolStackElem(atom)); atomOrders[atom->getIdx()] = molStack.size(); colors[atomIdx] = GREY_NODE; // --------------------- // // Build the list of possible destinations from here // // --------------------- std::vector< PossibleType > possibles; possibles.resize(0); ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom); possibles.reserve(bondsPair.second-bondsPair.first); while(bondsPair.first != bondsPair.second){ BOND_SPTR theBond = mol[*(bondsPair.first)]; bondsPair.first++; if(bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue; if(inBondIdx<0 || theBond->getIdx() != static_cast<unsigned int>(inBondIdx)){ int otherIdx = theBond->getOtherAtomIdx(atomIdx); long rank=ranks[otherIdx]; // --------------------- // // things are a bit more complicated if we are sitting on a // ring atom we would like to traverse first to the // ring-closure atoms, then to atoms outside the ring first, // then to atoms in the ring that haven't already been visited // (non-ring-closure atoms). // // Here's how the black magic works: // - non-ring atom neighbors have their original ranks // - ring atom neighbors have this added to their ranks: // (Bond::OTHER - bondOrder)*MAX_NATOMS*MAX_NATOMS // - ring-closure neighbors lose a factor of: // (Bond::OTHER+1)*MAX_NATOMS*MAX_NATOMS // // This tactic biases us to traverse to non-ring neighbors first, // original ordering if bond orders are all equal... crafty, neh? // // --------------------- if( colors[otherIdx] == GREY_NODE ) { rank -= static_cast<int>(Bond::OTHER+1) * MAX_NATOMS*MAX_NATOMS; if(!bondSymbols){ rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) * MAX_NATOMS; } else { const std::string &symb=(*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); rank += (hsh%MAX_NATOMS) * MAX_NATOMS; } } else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){ if(!bondSymbols){ rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) * MAX_NATOMS*MAX_NATOMS; } else { const std::string &symb=(*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); rank += (hsh%MAX_NATOMS)*MAX_NATOMS*MAX_NATOMS; } } possibles.push_back(PossibleType(rank,otherIdx,theBond.get())); } } // --------------------- // // Sort on ranks // // --------------------- std::sort(possibles.begin(),possibles.end(),_possibleComp); // --------------------- // // Now work the children // // --------------------- std::vector<MolStack> subStacks; for(std::vector<PossibleType>::iterator possiblesIt=possibles.begin(); possiblesIt!=possibles.end(); possiblesIt++){ MolStack subStack; #if 0 int possibleIdx = possiblesIt->second.first; Bond *bond = possiblesIt->second.second; #endif int possibleIdx = possiblesIt->get<1>(); Bond *bond = possiblesIt->get<2>(); Atom *otherAtom=mol.getAtomWithIdx(possibleIdx); unsigned int lowestRingIdx; INT_VECT::const_iterator cAIt; switch(colors[possibleIdx]){ case WHITE_NODE: // ----- // we haven't seen this node at all before // ----- // it might have some residual data from earlier calls, clean that up: if(otherAtom->hasProp("_TraversalBondIndexOrder")){ otherAtom->clearProp("_TraversalBondIndexOrder"); } directTravList.push_back(bond->getIdx()); subStack.push_back(MolStackElem(bond,atomIdx)); canonicalDFSTraversal(mol,possibleIdx,bond->getIdx(),colors, cycles,ranks,cyclesAvailable,subStack, atomOrders,bondVisitOrders,atomRingClosures,atomTraversalBondOrder, bondsInPlay,bondSymbols); subStacks.push_back(subStack); nAttached += 1; break; case GREY_NODE: // ----- // we've seen this, but haven't finished it (we're finishing a ring) // ----- cycleEndList.push_back(bond->getIdx()); cAIt=std::find(cyclesAvailable.begin(), cyclesAvailable.end(),1); if(cAIt==cyclesAvailable.end()){ throw ValueErrorException("Too many rings open at once. SMILES cannot be generated."); } lowestRingIdx = cAIt-cyclesAvailable.begin(); cyclesAvailable[lowestRingIdx] = 0; cycles[possibleIdx].push_back(lowestRingIdx); ++lowestRingIdx; bond->setProp("_TraversalRingClosureBond",lowestRingIdx); molStack.push_back(MolStackElem(bond, atom->getIdx())); molStack.push_back(MolStackElem(lowestRingIdx)); // we need to add this bond (which closes the ring) to the traversal list for the // other atom as well: atomTraversalBondOrder[otherAtom->getIdx()].push_back(bond->getIdx()); atomRingClosures[otherAtom->getIdx()].push_back(bond->getIdx()); break; default: // ----- // this node has been finished. don't do anything. // ----- break; } } INT_VECT &ringClosures=atomRingClosures[atom->getIdx()]; CHECK_INVARIANT(ringClosures.size()==cycles[atomIdx].size(), "ring closure mismatch"); for(unsigned int i=0;i<ringClosures.size();i++){ int ringIdx=cycles[atomIdx][i]; ringIdx += 1; molStack.push_back(MolStackElem(ringIdx)); } cycles[atomIdx].resize(0); MolStack::const_iterator ciMS; for(int i=0;i<nAttached;i++){ if(i<nAttached-1){ int branchIdx=0; if(subStacks[i].begin()->type==MOL_STACK_ATOM){ branchIdx=subStacks[i].begin()->obj.atom->getIdx(); } else if(subStacks[i].begin()->type==MOL_STACK_BOND){ branchIdx=-1*subStacks[i].begin()->obj.bond->getIdx(); } else { ASSERT_INVARIANT(0,"branch started with something other than an atom or bond"); } molStack.push_back(MolStackElem("(",branchIdx)); for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){ molStack.push_back(*ciMS); switch(ciMS->type){ case MOL_STACK_ATOM: atomOrders[ciMS->obj.atom->getIdx()] = molStack.size(); break; case MOL_STACK_BOND: bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size(); break; default: break; } } molStack.push_back(MolStackElem(")",branchIdx)); } else { for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){ molStack.push_back(*ciMS); switch(ciMS->type){ case MOL_STACK_ATOM: atomOrders[ciMS->obj.atom->getIdx()] = molStack.size(); break; case MOL_STACK_BOND: bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size(); break; default: break; } } } } //std::cerr<<"*****>>>>>> Traversal results for atom: "<<atom->getIdx()<<"> "; INT_LIST travList; // first push on the incoming bond: if(inBondIdx >= 0){ //std::cerr<<" "<<inBondIdx; travList.push_back(inBondIdx); } // ... ring closures that end here: for(INT_LIST_CI ilci=cycleEndList.begin();ilci!=cycleEndList.end();++ilci){ //std::cerr<<" ["<<*ilci<<"]"; travList.push_back(*ilci); } // ... ring closures that start here: // if(atom->hasProp("_TraversalBondIndexOrder")){ // INT_LIST indirectTravList; // atom->getProp("_TraversalBondIndexOrder",indirectTravList); // for(INT_LIST_CI ilci=indirectTravList.begin();ilci!=indirectTravList.end();++ilci){ // //std::cerr<<" ("<<*ilci<<")"; // travList.push_back(*ilci); // } // } BOOST_FOREACH(int ili,atomTraversalBondOrder[atom->getIdx()]){ travList.push_back(ili); }