Пример #1
0
    void Debloom_check1 ()
    {
        size_t kmerSize = 11;
        size_t miniSize = 8;
        size_t nks      = 1;

        const char* seqs[] = {
            "CGCTACAGCAGCTAGTTCATCATTGTTTATCAATGATAAAATATAATAAGCTAAAAGGAAACTATAAATA"
            "ACCATGTATAATTATAAGTAGGTACCTATTTTTTTATTTTAAACTGAAATTCAATATTATATAGGCAAAG"
        } ;

        /** We configure parameters for a SortingCountAlgorithm object. */
        IProperties* params = SortingCountAlgorithm<>::getDefaultProperties();  LOCAL (params);
        params->setInt (STR_KMER_SIZE,          kmerSize);
        params->setInt (STR_MINIMIZER_SIZE,     miniSize);
        params->setInt (STR_MAX_MEMORY,         MAX_MEMORY);
        params->setInt (STR_KMER_ABUNDANCE_MIN, nks);
        params->setStr (STR_URI_OUTPUT,         "foo");

        /** We create a SortingCountAlgorithm object. */
        SortingCountAlgorithm<> sortingCount (new BankStrings (seqs, ARRAY_SIZE(seqs)), params);

        /** We launch DSK. */
        sortingCount.execute();

        CPPUNIT_ASSERT (sortingCount.getSolidCounts()->getNbItems() == (int64_t)(strlen(seqs[0]) - kmerSize + 1) );

        /** We get the storage instance. */
        Storage* storage = sortingCount.getStorage();
        LOCAL (storage);

        Partition<SortingCountAlgorithm<>::Count>& counts = storage->getGroup("dsk").getPartition<Kmer<>::Count> ("solid");

        /** We create a bloom instance. */
        float nbitsPerKmer = DebloomAlgorithm<>::getNbBitsPerKmer (kmerSize, DEBLOOM_ORIGINAL);
        BloomAlgorithm<> bloom (*storage, &counts, kmerSize, nbitsPerKmer, 0, BLOOM_BASIC);
        bloom.execute ();

        /** We create a debloom instance. */
        DebloomAlgorithm<> debloom (
            storage->getGroup("bloom"),
            storage->getGroup("debloom"),
            &counts, kmerSize, miniSize, 1000, 0, BLOOM_BASIC, DEBLOOM_ORIGINAL
        );

        /** We launch the debloom. */
        debloom.execute();

        /** The following values have been computed with the original minia. */
        u_int64_t values[] =
        {
            0xc0620,    0x288f40,   0x188f40,   0x2aaa29,   0x8000b,    0x200881,   0x288081,   0x820db,    0x52e23,    0x2888f,
            0xaaa8b,    0x28838d,   0x20000,    0xa93ab,    0x2c18d,    0x2ba89,    0x183600,   0xea00b,    0x1a4ea0,   0xf8585
        };
        set<Kmer<>::Type> okValues;
        
        for (unsigned int i = 0; i < ARRAY_SIZE(values); i++)
        {
            Kmer<>::Type val; val.setVal(values[i]);
            okValues.insert(val);
        }

        CPPUNIT_ASSERT (debloom.getCriticalKmers()->getNbItems() == ARRAY_SIZE(values));

        /** We iterate the cFP kmers. */
        set<Kmer<>::Type> checkValues;
        Iterator<Kmer<>::Type>* iter = debloom.getCriticalKmers()->iterator();
        LOCAL (iter);

        for (iter->first(); !iter->isDone(); iter->next())
        {
            set<Kmer<>::Type>::iterator lookup = okValues.find (iter->item());
            CPPUNIT_ASSERT (lookup != okValues.end());

            checkValues.insert (iter->item());
        }

        CPPUNIT_ASSERT (checkValues.size() == okValues.size());
    }