//! Convert a SparseBitVector to an ExplicitBitVector ExplicitBitVect *convertToExplicit(const SparseBitVect *sbv) { unsigned int sl = sbv->getNumBits(); ExplicitBitVect *ebv = new ExplicitBitVect(sl); const IntSet *bset = sbv->getBitSet(); for (IntSetConstIter it = bset->begin(); it != bset->end(); it++) { ebv->setBit(*it); } return ebv; }
void reaccsToFingerprint(struct reaccs_molecule_t *molPtr, ExplicitBitVect &res, unsigned int bitFlags = 32767U, bool isQuery = false, bool resetVect = true, unsigned int nBytes = 64) { PRECONDITION(molPtr, "bad molecule"); PRECONDITION(res.getNumBits() >= nBytes * 8U, "res too small"); if (resetVect) res.clearBits(); char *fingerprint = getFp(molPtr, bitFlags, isQuery, nBytes); for (unsigned int i = 0; i < nBytes; ++i) { char byte = fingerprint[i]; if (byte) { char mask = 1; for (int j = 0; j < 8; ++j) { if (byte & mask) { res.setBit(i * 8 + j); } mask = mask << 1; } } } MyFree(fingerprint); };
// caller owns the result, it must be deleted ExplicitBitVect *PatternFingerprintMol(const ROMol &mol, unsigned int fpSize, std::vector<unsigned int> *atomCounts, ExplicitBitVect *setOnlyBits) { PRECONDITION(fpSize != 0, "fpSize==0"); PRECONDITION(!atomCounts || atomCounts->size() >= mol.getNumAtoms(), "bad atomCounts size"); PRECONDITION(!setOnlyBits || setOnlyBits->getNumBits() == fpSize, "bad setOnlyBits size"); std::vector<const ROMol *> patts; patts.reserve(10); unsigned int idx = 0; while (1) { std::string pq = pqs[idx]; if (pq == "") break; ++idx; const ROMol *matcher = pattern_flyweight(pq).get().getMatcher(); CHECK_INVARIANT(matcher, "bad smarts"); patts.push_back(matcher); } if (!mol.getRingInfo()->isInitialized()) { MolOps::fastFindRings(mol); } boost::dynamic_bitset<> isQueryAtom(mol.getNumAtoms()), isQueryBond(mol.getNumBonds()); ROMol::VERTEX_ITER firstA, lastA; boost::tie(firstA, lastA) = mol.getVertices(); while (firstA != lastA) { const Atom *at = mol[*firstA].get(); if (isComplexQuery(at)) { isQueryAtom.set(at->getIdx()); // std::cerr<<" complex atom: "<<at->getIdx()<<std::endl; } ++firstA; } ROMol::EDGE_ITER firstB, lastB; boost::tie(firstB, lastB) = mol.getEdges(); while (firstB != lastB) { const Bond *bond = mol[*firstB].get(); // if( isComplexQuery(bond) ){ if (isPatternComplexQuery(bond)) { isQueryBond.set(bond->getIdx()); // std::cerr<<" complex bond: "<<bond->getIdx()<<std::endl; } ++firstB; } ExplicitBitVect *res = new ExplicitBitVect(fpSize); unsigned int pIdx = 0; BOOST_FOREACH (const ROMol *patt, patts) { ++pIdx; std::vector<MatchVectType> matches; // uniquify matches? // time for 10K molecules w/ uniquify: 5.24s // time for 10K molecules w/o uniquify: 4.87s SubstructMatch(mol, *patt, matches, false); boost::uint32_t mIdx = pIdx + patt->getNumAtoms() + patt->getNumBonds(); BOOST_FOREACH (MatchVectType &mv, matches) { #ifdef VERBOSE_FINGERPRINTING std::cerr << "\nPatt: " << pIdx << " | "; #endif // collect bits counting the number of occurances of the pattern: gboost::hash_combine(mIdx, 0xBEEF); res->setBit(mIdx % fpSize); #ifdef VERBOSE_FINGERPRINTING std::cerr << "count: " << mIdx % fpSize << " | "; #endif bool isQuery = false; boost::uint32_t bitId = pIdx; std::vector<unsigned int> amap(mv.size(), 0); BOOST_FOREACH (MatchVectType::value_type &p, mv) { #ifdef VERBOSE_FINGERPRINTING std::cerr << p.second << " "; #endif if (isQueryAtom[p.second]) { isQuery = true; #ifdef VERBOSE_FINGERPRINTING std::cerr << "atom query."; #endif break; } gboost::hash_combine(bitId, mol.getAtomWithIdx(p.second)->getAtomicNum()); amap[p.first] = p.second; }