//! Convert a SparseBitVector to an ExplicitBitVector ExplicitBitVect *convertToExplicit(const SparseBitVect *sbv) { unsigned int sl = sbv->getNumBits(); ExplicitBitVect *ebv = new ExplicitBitVect(sl); const IntSet *bset = sbv->getBitSet(); for (IntSetConstIter it = bset->begin(); it != bset->end(); it++) { ebv->setBit(*it); } return ebv; }
void InfoBitRanker::accumulateVotes(const ExplicitBitVect &bv, unsigned int label) { RANGE_CHECK(0, label, d_classes-1); CHECK_INVARIANT(bv.getNumBits() == d_dims, "Incorrect bit vector size"); d_nInst += 1; d_clsCount[label] += 1; for (unsigned int i=0;i<bv.getNumBits();i++){ if( (*bv.dp_bits)[i] && (!dp_maskBits || dp_maskBits->getBit(i)) ){ d_counts[label][i] += 1; } } }
static python::tuple getinitargs(const ExplicitBitVect& self) { std::string res=self.toString(); python::object retval = python::object(python::handle<>(PyBytes_FromStringAndSize(res.c_str(),res.length()))); return python::make_tuple(retval); };
//' map a molecule to a maccs fingerprints //' //' @param xp a molecule //' @return a vector // [[Rcpp::export]] IntegerVector mol2maccs( SEXP xp){ int i; RWMol *mol = p_getMol(xp); if( mol ){ ExplicitBitVect *bv = MACCSFingerprints::getFingerprintAsBitVect ( *mol ); if(bv){ IntegerVector v = IntegerVector( bv->getNumBits ()); for( i = 0; i< bv->getNumBits ();i++ ){ v(i) = bv->getBit(i)?1:0; } return v; } } return IntegerVector(0); }
void testMols(std::vector<std::unique_ptr<ROMol>> &mols, FragFPGenerator &fpGen, FragCatalog &fcat) { int nDone = 0; for (auto &&mi : mols) { ExplicitBitVect *fp = fpGen.getFPForMol(*mi, fcat); switch (nDone) { case 0: TEST_ASSERT(fp->getNumOnBits() == 3); TEST_ASSERT((*fp)[0]); TEST_ASSERT((*fp)[1]); TEST_ASSERT((*fp)[2]); break; case 1: TEST_ASSERT(fp->getNumOnBits() == 2); TEST_ASSERT((*fp)[1]); TEST_ASSERT((*fp)[3]); break; case 2: TEST_ASSERT(fp->getNumOnBits() == 3); TEST_ASSERT((*fp)[1]); TEST_ASSERT((*fp)[4]); TEST_ASSERT((*fp)[5]); break; case 3: TEST_ASSERT(fp->getNumOnBits() == 3); TEST_ASSERT((*fp)[1]); TEST_ASSERT((*fp)[6]); TEST_ASSERT((*fp)[7]); break; case 4: TEST_ASSERT(fp->getNumOnBits() == 2); TEST_ASSERT((*fp)[0]); TEST_ASSERT((*fp)[8]); break; case 5: TEST_ASSERT(fp->getNumOnBits() == 3); TEST_ASSERT((*fp)[0]); TEST_ASSERT((*fp)[6]); TEST_ASSERT((*fp)[9]); break; case 6: TEST_ASSERT(fp->getNumOnBits() == 5); TEST_ASSERT((*fp)[0]); TEST_ASSERT((*fp)[1]); TEST_ASSERT((*fp)[2]); TEST_ASSERT((*fp)[3]); TEST_ASSERT((*fp)[10]); break; } delete fp; nDone += 1; } }
void testMols(std::vector<ROMol *> &mols, FragFPGenerator &fpGen, FragCatalog &fcat) { std::vector<ROMol *>::iterator mi; int nDone = 0; for (mi = mols.begin(); mi != mols.end(); mi++) { ExplicitBitVect *fp = fpGen.getFPForMol(*(*mi), fcat); switch (nDone) { case 0: TEST_ASSERT(fp->getNumOnBits() == 3); TEST_ASSERT((*fp)[0]); TEST_ASSERT((*fp)[1]); TEST_ASSERT((*fp)[2]); break; case 1: TEST_ASSERT(fp->getNumOnBits() == 2); TEST_ASSERT((*fp)[1]); TEST_ASSERT((*fp)[3]); break; case 2: TEST_ASSERT(fp->getNumOnBits() == 3); TEST_ASSERT((*fp)[1]); TEST_ASSERT((*fp)[4]); TEST_ASSERT((*fp)[5]); break; case 3: TEST_ASSERT(fp->getNumOnBits() == 3); TEST_ASSERT((*fp)[1]); TEST_ASSERT((*fp)[6]); TEST_ASSERT((*fp)[7]); break; case 4: TEST_ASSERT(fp->getNumOnBits() == 2); TEST_ASSERT((*fp)[0]); TEST_ASSERT((*fp)[8]); break; case 5: TEST_ASSERT(fp->getNumOnBits() == 3); TEST_ASSERT((*fp)[0]); TEST_ASSERT((*fp)[6]); TEST_ASSERT((*fp)[9]); break; case 6: TEST_ASSERT(fp->getNumOnBits() == 5); TEST_ASSERT((*fp)[0]); TEST_ASSERT((*fp)[1]); TEST_ASSERT((*fp)[2]); TEST_ASSERT((*fp)[3]); TEST_ASSERT((*fp)[10]); break; } nDone += 1; } }
//' map a molecule to a morgan fingerprints (atom environement) //' //' @param xp a molecule //' @param radius radius of scaffold //' @param nBits final length //' @param useFeatures useFeatures //' @return a vector // [[Rcpp::export]] IntegerVector mol2morgan( SEXP xp , unsigned int radius=2,unsigned int nBits=2048, bool useFeatures=false){ // radius = 2 => ecfp4 2^2 // radius = 2 & useFeatures=True => fcfp4 int i; RWMol *mol = p_getMol(xp); if( mol ){ ExplicitBitVect *bv = MorganFingerprints::getFingerprintAsBitVect ( *mol , radius,nBits, 0,0,false,useFeatures,false,0); if(bv){ IntegerVector v = IntegerVector( bv->getNumBits ()); for( i = 0; i< bv->getNumBits ();i++ ){ v(i) = bv->getBit(i)?1:0; } delete bv; return v; } } return IntegerVector(0); }
void reaccsToFingerprint(struct reaccs_molecule_t *molPtr, ExplicitBitVect &res, unsigned int bitFlags = 32767U, bool isQuery = false, bool resetVect = true, unsigned int nBytes = 64) { PRECONDITION(molPtr, "bad molecule"); PRECONDITION(res.getNumBits() >= nBytes * 8U, "res too small"); if (resetVect) res.clearBits(); char *fingerprint = getFp(molPtr, bitFlags, isQuery, nBytes); for (unsigned int i = 0; i < nBytes; ++i) { char byte = fingerprint[i]; if (byte) { char mask = 1; for (int j = 0; j < 8; ++j) { if (byte & mask) { res.setBit(i * 8 + j); } mask = mask << 1; } } } MyFree(fingerprint); };
int NumBitsInCommon(const ExplicitBitVect& bv1, const ExplicitBitVect& bv2) { return bv1.getNumBits() - ((*bv1.dp_bits) ^ (*bv2.dp_bits)).count(); }
static python::tuple getinitargs(const ExplicitBitVect& self) { return python::make_tuple(self.toString()); };
// caller owns the result, it must be deleted ExplicitBitVect *PatternFingerprintMol(const ROMol &mol, unsigned int fpSize, std::vector<unsigned int> *atomCounts, ExplicitBitVect *setOnlyBits) { PRECONDITION(fpSize != 0, "fpSize==0"); PRECONDITION(!atomCounts || atomCounts->size() >= mol.getNumAtoms(), "bad atomCounts size"); PRECONDITION(!setOnlyBits || setOnlyBits->getNumBits() == fpSize, "bad setOnlyBits size"); std::vector<const ROMol *> patts; patts.reserve(10); unsigned int idx = 0; while (1) { std::string pq = pqs[idx]; if (pq == "") break; ++idx; const ROMol *matcher = pattern_flyweight(pq).get().getMatcher(); CHECK_INVARIANT(matcher, "bad smarts"); patts.push_back(matcher); } if (!mol.getRingInfo()->isInitialized()) { MolOps::fastFindRings(mol); } boost::dynamic_bitset<> isQueryAtom(mol.getNumAtoms()), isQueryBond(mol.getNumBonds()); ROMol::VERTEX_ITER firstA, lastA; boost::tie(firstA, lastA) = mol.getVertices(); while (firstA != lastA) { const Atom *at = mol[*firstA].get(); if (isComplexQuery(at)) { isQueryAtom.set(at->getIdx()); // std::cerr<<" complex atom: "<<at->getIdx()<<std::endl; } ++firstA; } ROMol::EDGE_ITER firstB, lastB; boost::tie(firstB, lastB) = mol.getEdges(); while (firstB != lastB) { const Bond *bond = mol[*firstB].get(); // if( isComplexQuery(bond) ){ if (isPatternComplexQuery(bond)) { isQueryBond.set(bond->getIdx()); // std::cerr<<" complex bond: "<<bond->getIdx()<<std::endl; } ++firstB; } ExplicitBitVect *res = new ExplicitBitVect(fpSize); unsigned int pIdx = 0; BOOST_FOREACH (const ROMol *patt, patts) { ++pIdx; std::vector<MatchVectType> matches; // uniquify matches? // time for 10K molecules w/ uniquify: 5.24s // time for 10K molecules w/o uniquify: 4.87s SubstructMatch(mol, *patt, matches, false); boost::uint32_t mIdx = pIdx + patt->getNumAtoms() + patt->getNumBonds(); BOOST_FOREACH (MatchVectType &mv, matches) { #ifdef VERBOSE_FINGERPRINTING std::cerr << "\nPatt: " << pIdx << " | "; #endif // collect bits counting the number of occurances of the pattern: gboost::hash_combine(mIdx, 0xBEEF); res->setBit(mIdx % fpSize); #ifdef VERBOSE_FINGERPRINTING std::cerr << "count: " << mIdx % fpSize << " | "; #endif bool isQuery = false; boost::uint32_t bitId = pIdx; std::vector<unsigned int> amap(mv.size(), 0); BOOST_FOREACH (MatchVectType::value_type &p, mv) { #ifdef VERBOSE_FINGERPRINTING std::cerr << p.second << " "; #endif if (isQueryAtom[p.second]) { isQuery = true; #ifdef VERBOSE_FINGERPRINTING std::cerr << "atom query."; #endif break; } gboost::hash_combine(bitId, mol.getAtomWithIdx(p.second)->getAtomicNum()); amap[p.first] = p.second; }