void testRevcompRepresentative() { list<Sequence> reads = Fasta("../../data/representative_revcomp.fa").getAll(); KmerRepresentativeComputer krc(reads, "##############"); krc.setOptions(false, 3, 0.5); krc.setCoverageReferenceLength(50); krc.compute(); Sequence representative = krc.getRepresentative(); // Computing reads revcomp for (list <Sequence>::iterator it = reads.begin(); it != reads.end(); it++) { it->sequence = revcomp(it->sequence); } KmerRepresentativeComputer krc2(reads, "##############"); krc2.setOptions(false, 3, 0.5); krc2.setCoverageReferenceLength(50); krc2.compute(); Sequence representative2 = krc2.getRepresentative(); // Check position of [ in label, so that we remove that part, and then we // can compare the labels size_t pos1 = representative.label.find_first_of('['); size_t pos2 = representative2.label.find_first_of('['); TAP_TEST(representative.label.substr(0, pos1) == representative2.label.substr(0, pos2), TEST_KMER_REPRESENTATIVE_REVCOMP, "The two representatives should have the same label"); TAP_TEST(revcomp(representative.sequence) == representative2.sequence, TEST_KMER_REPRESENTATIVE_REVCOMP, "The two representatives should have the same sequence (but revcomp-ed)"); }
void testFastaNbSequences() { TAP_TEST(nb_sequences_in_fasta("../../germline/IGHV.fa") == 348, TEST_FASTA_NB_SEQUENCES, "ccc"); int a1 = approx_nb_sequences_in_fasta("../../germline/IGHV.fa"); TAP_TEST(a1 >= 340 && a1 <= 348, TEST_FASTA_NB_SEQUENCES, ""); int a2 = nb_sequences_in_fasta("../../data/Stanford_S22.fasta", true); TAP_TEST(a2 >= 13100 && a2 <= 13200, TEST_FASTA_NB_SEQUENCES, ""); }
void testSequenceOutputOperator() { ostringstream oss; Sequence seq = {"a b c", "a", "GATTACA", "AIIIIIH", NULL}; oss << seq; TAP_TEST(oss.str() == "@a\nGATTACA\n+\nAIIIIIH\n", TEST_SEQUENCE_OUT, oss.str()); ostringstream oss2; seq.quality = ""; oss2 << seq; TAP_TEST(oss2.str() == ">a\nGATTACA\n", TEST_SEQUENCE_OUT, oss.str()); }
void testFastaAdd() { Fasta fa1("../../data/test1.fa"); Fasta fa2("../../data/test1.fa"); fa2.add("../../data/test1.fa"); TAP_TEST(fa1.size() * 2 == fa2.size(), TEST_FASTA_ADD, ""); for (int i=0; i < fa1.size(); i++) { TAP_TEST(fa1.label(i) == fa2.label(i) && fa1.label(i) == fa2.label(i+fa1.size()), TEST_FASTA_ADD, ""); TAP_TEST(fa1.label_full(i) == fa2.label_full(i) && fa1.label_full(i) == fa2.label_full(i+fa1.size()), TEST_FASTA_ADD, ""); TAP_TEST(fa1.sequence(i) == fa2.sequence(i) && fa1.sequence(i) == fa2.sequence(i+fa1.size()), TEST_FASTA_ADD, ""); } }
void testScore() { // ReadLengthScore testing ReadLengthScore rls; Sequence seq1 = {"seq", "seq", "", "", 0}; TAP_TEST_EQUAL(rls.getScore(seq1), 0., TEST_LENGTH_SCORE, "score should be 0, is " << rls.getScore(seq1)); Sequence seq2 = {"seq", "seq", "ATCGTTTACGTC", "", 0}; TAP_TEST_EQUAL(rls.getScore(seq2), 12., TEST_LENGTH_SCORE, "score should be 12, is " << rls.getScore(seq2)); Sequence seq3 = {"seq", "seq", "A", "", 0}; TAP_TEST_EQUAL(rls.getScore(seq3), 1., TEST_LENGTH_SCORE, "score should be 1, is " << rls.getScore(seq3)); // ReadQualityScore testing ReadQualityScore rqs; Sequence seq4 = {"s", "s", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", 0}; TAP_TEST((int) rqs.getScore(seq4) == (int) (41 * 120/ GOOD_QUALITY), TEST_QUALITY_SCORE, "score should be " << (int) (41 * 120/ GOOD_QUALITY) << " not " << rqs.getScore(seq4)); // Changing the quality, put the percentile should not change yet. seq4.quality[10] = '-'; TAP_TEST((int) rqs.getScore(seq4) == (int) (41 * 120/ GOOD_QUALITY), TEST_QUALITY_SCORE, "score should be " << (int) (41 * 120/ GOOD_QUALITY) << " not " << rqs.getScore(seq4)); // Now the percentile value should change and should correspond to '-' seq4.quality[22] = '!'; TAP_TEST((int) rqs.getScore(seq4) == (int) (('-' - ' ') * 120/ GOOD_QUALITY), TEST_QUALITY_SCORE, "score should be " << (int) (('-' - ' ') * 120/ GOOD_QUALITY) << " not " << rqs.getScore(seq4)); // Quality does not exist anymore → the score is the length seq4.quality = ""; TAP_TEST_EQUAL(rqs.getScore(seq4), 120, TEST_QUALITY_SCORE, "score should be 120 not " << rqs.getScore(seq4)); }
void testExtendedNucleotides() { TAP_TEST(is_extended_nucleotide('A') == false, TEST_EXTENDED_NUCL, ""); TAP_TEST(is_extended_nucleotide('a') == false, TEST_EXTENDED_NUCL, ""); TAP_TEST(is_extended_nucleotide('N') == true, TEST_EXTENDED_NUCL, ""); TAP_TEST(is_extended_nucleotide(' ') == true, TEST_EXTENDED_NUCL, ""); TAP_TEST(has_extended_nucleotides("") == false, TEST_EXTENDED_NUCL, ""); TAP_TEST(has_extended_nucleotides("ACGTacgt") == false, TEST_EXTENDED_NUCL, ""); TAP_TEST(has_extended_nucleotides("ACGTnacgt") == true, TEST_EXTENDED_NUCL, ""); }
void testChooser() { list<Sequence> reads = BioReader("data/test1.fa").getAll(); ReadLengthScore rls; ReadChooser rc(reads, rls); TAP_TEST(rc.getithBest(1).label == "seq4", TEST_READ_CHOOSER_SORTED, "First sequence is " << rc.getithBest(1).label); TAP_TEST(rc.getithBest(1).label == rc.getBest().label && rc.getithBest(1).sequence == rc.getBest().sequence, TEST_READ_CHOOSER_BEST,""); TAP_TEST(rc.getithBest(2).label == "seq2", TEST_READ_CHOOSER_SORTED, "Second sequence is " << rc.getithBest(2).label); TAP_TEST(rc.getithBest(3).label == "seq1", TEST_READ_CHOOSER_SORTED, "Third sequence is " << rc.getithBest(3).label); TAP_TEST(rc.getithBest(4).label == "", TEST_READ_CHOOSER_SORTED, "First sequence is " << rc.getithBest(4).label); }
void testFasta1() { Fasta fa("../../data/test1.fa"); Fasta fq("../../data/test1.fq"); TAP_TEST(fa.size() == fq.size(), TEST_FASTA_SIZE, ""); for (int i=0; i < fa.size(); i++) { TAP_TEST(fa.label(i) == fq.label(i), TEST_FASTA_LABEL, ""); TAP_TEST(fa.label_full(i) == fq.label_full(i), TEST_FASTA_LABEL_FULL, ""); TAP_TEST(fa.sequence(i) == fq.sequence(i), TEST_FASTA_SEQUENCE, ""); } TAP_TEST(fa.label(2) == "seq3", TEST_FASTA_LABEL, ""); TAP_TEST(fa.sequence(2) == "A", TEST_FASTA_SEQUENCE, ""); TAP_TEST(fa.label(4) == "", TEST_FASTA_LABEL, ""); TAP_TEST(fa.sequence(4) == "AATN", TEST_FASTA_SEQUENCE, ""); }
void testCreateSequence() { Sequence seq1 = create_sequence("label", "l", "AAAAAAAAAA", "!!!!!!!!!!"); Sequence seq2 = create_sequence("", "", "", ""); TAP_TEST(seq1.label_full == "label", TEST_CREATE_SEQUENCE_LABEL_FULL, ""); TAP_TEST(seq2.label_full == "", TEST_CREATE_SEQUENCE_LABEL_FULL, ""); TAP_TEST(seq1.label == "l", TEST_CREATE_SEQUENCE_LABEL, ""); TAP_TEST(seq2.label == "", TEST_CREATE_SEQUENCE_LABEL, ""); TAP_TEST(seq1.sequence == "AAAAAAAAAA", TEST_CREATE_SEQUENCE_SEQUENCE, ""); TAP_TEST(seq2.sequence == "", TEST_CREATE_SEQUENCE_SEQUENCE, ""); TAP_TEST(seq1.quality == "!!!!!!!!!!", TEST_CREATE_SEQUENCE_QUALITY, ""); TAP_TEST(seq2.quality == "", TEST_CREATE_SEQUENCE_QUALITY, ""); }
void testRCInsertAcAutomaton() { PointerACAutomaton<KmerAffect> aho(true); aho.insert("ACAGTC", "V", true, 0, "##-##"); aho.build_failure_functions(); // Will insert AC-GT → ACAGT, ACCGT, ACGGT, ACTGT // and CA-TC → CAATC, CACTC, CAGTC, CATTC // plus the revcomps: // Will insert GA-TG → GAATG, GACTG, GAGTG, GATTG // and AC-GT → ACAGT, ACCGT, ACGGT, ACTGT pointer_state<KmerAffect> *state = aho.goto_state("ACCGT"); TAP_TEST_EQUAL(state->informations.size(), 1, TEST_AC_GET, ""); TAP_TEST_EQUAL(state->informations.front(), AFFECT_AMBIGUOUS, TEST_AC_GET, ""); TAP_TEST(state->is_final, TEST_AC_FINAL, ""); TAP_TEST(! aho.goto_state("CAAT")->is_final, TEST_AC_FINAL, ""); TAP_TEST(aho.goto_state("CAAT")->informations.size() == 1, TEST_AC_GET, ""); TAP_TEST(aho.goto_state("CAAT")->informations.front() == AFFECT_UNKNOWN, TEST_AC_GET, ""); TAP_TEST(aho.goto_state("GAGTG")->informations.front() == AFFECT_V_BWD, TEST_AC_GET, ""); TAP_TEST(aho.goto_state("GAGTG")->is_final, TEST_AC_FINAL, ""); TAP_TEST(aho.goto_state("GAGTG")->transitions[A] == aho.goto_state("GA"), TEST_AC_TRANSITIONS, ""); vector<KmerAffect> results = aho.getResults("ACCGTgaatgCATTCA"); vector<KmerAffect> expected = {AFFECT_AMBIGUOUS, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_V_BWD, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_V, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN }; TAP_TEST(results.size() == expected.size(), TEST_AC_GET_RESULTS, ""); TAP_TEST(results == expected, TEST_AC_GET_RESULTS, ""); }
void testExtractBasename() { TAP_TEST(extract_basename("/var/toto/titi/tutu/bla.bli.bluc", true) == "bla.bli", TEST_EXTRACT_BASENAME, extract_basename("/var/toto/titi/tutu/bla.bli.bluc", true)); TAP_TEST(extract_basename("/var/toto/titi/tutu/bla.bli.bluc", false) == "bla.bli.bluc", TEST_EXTRACT_BASENAME, extract_basename("/var/toto/titi/tutu/bla.bli.bluc", false)); TAP_TEST(extract_basename("bla.bli.bluc", true) == "bla.bli", TEST_EXTRACT_BASENAME, extract_basename("bla.bli.bluc", true)); TAP_TEST(extract_basename("bla.bli.bluc", false) == "bla.bli.bluc", TEST_EXTRACT_BASENAME, extract_basename("bla.bli.bluc", false)); TAP_TEST(extract_basename("a_filename_without_extension", true) == "a_filename_without_extension", TEST_EXTRACT_BASENAME, extract_basename("a_filename_without_extension", true)); TAP_TEST(extract_basename("/", true) == "", TEST_EXTRACT_BASENAME, extract_basename("/", true)); }
void testRepresentative() { list<Sequence> reads = Fasta("../../data/representative.fa").getAll(); KmerRepresentativeComputer krc(reads, "##############"); krc.setStabilityLimit(0); krc.setRevcomp(false); krc.setMinCover(1); krc.setPercentCoverage(0.5); krc.setCoverageReferenceLength(50); krc.compute(); Sequence representative = krc.getRepresentative(); // Seq3 is the longest it should be taken when performing 0 extra iteration TAP_TEST(representative.label.find("seq3-[37,73]") == 0, TEST_KMER_REPRESENTATIVE, "If we take the first representative we should have seq3, and not at the beginning (" << representative.label << " instead)"); krc.setStabilityLimit(1); krc.compute(); representative = krc.getRepresentative(); TAP_TEST(representative.label.find("seq3-[37,73]") == 0, TEST_KMER_REPRESENTATIVE, "When allowing one step before stability, we should still have seq3 (" << representative.label << " instead)"); krc.setStabilityLimit(2); krc.compute(); representative = krc.getRepresentative(); TAP_TEST(representative.label.find("seq1-[0,41]") == 0, TEST_KMER_REPRESENTATIVE, "When allowing two steps before stability, we should reach seq1 (" << representative.label << " instead)"); krc.setRevcomp(true); krc.setRequiredSequence("ATCGCGCCCT"); // revcomp krc.compute(); representative = krc.getRepresentative(); TAP_TEST(representative.label.find("seq2-[33,52]") == 0, TEST_KMER_REPRESENTATIVE_REQUIRED_SEQ, "When requiring sequence ATCGCGCCCT, we should have seq2 (" << representative.label << " instead)"); krc.setRevcomp(false); krc.compute(); TAP_TEST(! krc.hasRepresentative(), TEST_KMER_REPRESENTATIVE_REQUIRED_SEQ, "When requiring sequence AGGGCGCGAT and revcomp=false, we shouldn't find anything (the sequence is revcomp-ed)"); krc.setRequiredSequence(""); krc.setMinCover(4); krc.compute(); TAP_TEST(! krc.hasRepresentative(), TEST_KMER_REPRESENTATIVE, "When requiring 4 reads to support the representative, we should not find any solution."); }
void testLongest() { list<Sequence> seqs; seqs.push_back(create_sequence("seq1", "seq1", "AAAAAAAAA", "")); seqs.push_back(create_sequence("seq2", "seq2", "AAAAA", "")); seqs.push_back(create_sequence("seq3", "seq3", "AAAAAAAA", "")); seqs.push_back(create_sequence("seq4", "seq4", "AAAAAAAAAA", "")); seqs.push_back(create_sequence("seq5", "seq5", "AAAAAA", "")); seqs.push_back(create_sequence("seq6", "seq6", "AAAAAAA", "")); SequenceSampler s(seqs); list<Sequence> l1 = s.getLongest(6, 11); size_t *distrib = s.getLengthDistribution(); TAP_TEST(distrib[0] == 0 && distrib[1] == 0 && distrib[2] == 0 && distrib[3] == 0 && distrib[4] == 0 && distrib[5] == 1 && distrib[6] == 1 && distrib[7] == 1 && distrib[8] == 1 && distrib[9] == 1 && distrib[10] == 1, TEST_SAMPLER_LENGTH, ""); char id = '1'; TAP_TEST(l1.size() == 6, TEST_SAMPLER_LONGEST, ""); for (list<Sequence>::const_iterator it = l1.begin(); it != l1.end(); it++) { TAP_TEST(it->label[3] == id, TEST_SAMPLER_LONGEST, ""); id++; } // With only 10 buckets, the two longest sequences share the same bucket. // Due to their insertion order, the shorter will be sampled first l1 = s.getLongest(2, 10); distrib = s.getLengthDistribution(); TAP_TEST(distrib[0] == 0 && distrib[1] == 0 && distrib[2] == 0 && distrib[3] == 0 && distrib[4] == 0 && distrib[5] == 1 && distrib[6] == 1 && distrib[7] == 1 && distrib[8] == 1 && distrib[9] == 2, TEST_SAMPLER_LENGTH, ""); TAP_TEST(l1.size() == 2, TEST_SAMPLER_LONGEST, ""); TAP_TEST(l1.front().sequence.size() == 9, TEST_SAMPLER_LONGEST, ""); Sequence next = *(++l1.begin()); TAP_TEST(next.sequence.size() == 10, TEST_SAMPLER_LONGEST, "label = " << next.label); }
// Generate 10 sequences, and launch 10 times getRandom(1). // We should not have the same sequence 10 times (p < 10^{-10}) void testRandom() { list<Sequence> seqs; string seg_name = "seq"; char id = '0'; string sequence = "AA"; for (int i = 0; i < 10; i++) { seqs.push_back(create_sequence("seq" + string_of_int(id), "seq" + string_of_int(id), sequence, "")); sequence += "A"; id++; } SequenceSampler sampler(seqs); string first_random = sampler.getRandom(1).front().label; bool all_equal = true; for (int i = 0; i < 9 && all_equal; i++) { if (first_random != sampler.getRandom(1).front().label) all_equal = false; } TAP_TEST(all_equal == false, TEST_SAMPLER_RANDOM, "On the 10 trials, we drawn 10 times the same sequence"); }
void testOnlineFasta1() { OnlineFasta fa("../../data/test1.fa"); OnlineFasta fq("../../data/test1.fq"); int nb_seq = 0; TAP_TEST(fa.getLineNb() == 1, TEST_O_FASTA_LINE_NB, ""); TAP_TEST(fq.getLineNb() == 1, TEST_O_FASTA_LINE_NB, ""); while (fa.hasNext()) { TAP_TEST(fq.hasNext(), TEST_O_FASTA_HAS_NEXT, ""); fa.next(); fq.next(); Sequence s1 = fa.getSequence(); Sequence s2 = fq.getSequence(); TAP_TEST(s1.label == s2.label && s1.label_full == s2.label_full && s1.sequence == s2.sequence, TEST_O_FASTA_GET_SEQUENCE, "fa: " << fa.getSequence() << endl << "fq: " << fq.getSequence()); nb_seq++; } TAP_TEST(fq.getLineNb() == 20, TEST_O_FASTA_LINE_NB, ""); TAP_TEST(! fq.hasNext(), TEST_O_FASTA_HAS_NEXT, ""); TAP_TEST(nb_seq == 5, TEST_O_FASTA_HAS_NEXT, ""); }
void testFastaAddThrows() { bool caught = false; try { Fasta fa1("mlkdkklflskjfskldfj.fa"); } catch (invalid_argument e) { TAP_TEST(string(e.what()).find("Error in opening file") != string::npos, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); Fasta fa1("../../data/test1.fa"); caught = false; try { fa1.add("ljk:lkjsdfsdlfjsdlfkjs.fa"); } catch (invalid_argument e) { TAP_TEST(string(e.what()).find("Error in opening file") != string::npos, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); caught = false; try { fa1.add("testTools.cpp"); } catch (invalid_argument e) { TAP_TEST(string(e.what()).find("The file seems to be malformed") != string::npos, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); caught = false; try { OnlineFasta fa("lkjdflkdfjglkdfjg.fa"); } catch (invalid_argument e) { TAP_TEST(string(e.what()).find("Error in opening file") != string::npos, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); caught = false; try { Fasta fa1("../../data/malformed1.fq"); } catch (invalid_argument e) { TAP_TEST(string(e.what()).find("Expected line starting with +") != string::npos, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); caught = false; try { Fasta fa1("../../data/malformed2.fq"); } catch (invalid_argument e) { TAP_TEST(string(e.what()).find("Unexpected EOF") == 0, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); caught = false; try { Fasta fa1("../../data/malformed3.fq"); } catch (invalid_argument e) { TAP_TEST(string(e.what()).find("Quality and sequence don't have the same length") == 0, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); caught = false; try { Fasta fa1("../../data/malformed4.fq"); } catch (invalid_argument e) { TAP_TEST(string(e.what()).find("Unexpected EOF") == 0, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); caught = false; try { Fasta fa1("../../data/malformed5.fq"); } catch (invalid_argument e) { TAP_TEST(string(e.what()).find("Unexpected EOF") == 0, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); caught = false; try { // Can't test empty file with Fasta since we // don't complain for empty files explicitly in Fasta constructor. OnlineFasta fa1("../../data/malformed6.fq"); fa1.next(); } catch (invalid_argument e) { TAP_TEST(string(e.what()).find("Unexpected EOF") == 0, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); caught = false; try { Fasta fa1("../../data/malformed7.fq"); } catch(invalid_argument e) { TAP_TEST(string(e.what()).find("Unexpected EOF") == 0, TEST_FASTA_INVALID_FILE, ""); caught = true; } TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, ""); }
void testNChooseK() { TAP_TEST(nChoosek(1, 10) == 0, TEST_N_CHOOSE_K, ""); TAP_TEST(nChoosek(1, 1) == 1, TEST_N_CHOOSE_K, ""); TAP_TEST(nChoosek(5, 2) == 10, TEST_N_CHOOSE_K, ""); TAP_TEST(nChoosek(8, 4) == 70, TEST_N_CHOOSE_K, ""); }
/* This test check the integrity of the getMultiResults function in AbstractACAutomaton class and its inherited classes. */ void testGetMultiResults(){ map<KmerAffect,int> results; PointerACAutomaton<KmerAffect> aho(false); const string errorOccurence = "KmerAffect doesn't have the good number of occurences."; const string errorSize = "Map has too many Kmers."; seqtype seq = "TTTTAATTAAGGGGCTACCCCCAATGTCCGTGGAGCTCTGGGGGGTTA"; affect_t affect[10]; seqtype seqs[10]; char c = 'a'; for(int i = 0; i < 10; ++i){ affect[i].c = c; c++; } seqs[0] = "AGCTCT"; seqs[1] = "TTTT"; seqs[2] = "AATT"; seqs[3] = "CGTGG"; seqs[4] = "CAATGTC"; seqs[5] = "AGGG"; seqs[6] = "GGGG"; seqs[7] = "TTAA"; seqs[8] = "GCTAC"; seqs[9] = "CCCC"; for(int i = 0;i < 10; ++i){ aho.insert(seqs[i], KmerAffect(affect[i])); } aho.build_failure_functions(); results = aho.getMultiResults(seq); /* Best situation: every sequences is found at least once in automaton. */ TAP_TEST(results.size() <= 11, TEST_AC_OCCURENCES, errorSize); TAP_TEST_EQUAL(results.at(aho.get(seqs[0])), 1, TEST_AC_OCCURENCES, errorOccurence); TAP_TEST_EQUAL(results.at(aho.get(seqs[1])), 1, TEST_AC_OCCURENCES, errorOccurence); TAP_TEST_EQUAL(results.at(aho.get(seqs[2])), 1, TEST_AC_OCCURENCES, errorOccurence); TAP_TEST_EQUAL(results.at(aho.get(seqs[3])), 1, TEST_AC_OCCURENCES, errorOccurence); TAP_TEST_EQUAL(results.at(aho.get(seqs[4])), 1, TEST_AC_OCCURENCES, errorOccurence); TAP_TEST_EQUAL(results.at(aho.get(seqs[5])), 1, TEST_AC_OCCURENCES, errorOccurence); TAP_TEST_EQUAL(results.at(aho.get(seqs[6])), 4, TEST_AC_OCCURENCES, errorOccurence); TAP_TEST_EQUAL(results.at(aho.get(seqs[7])), 2, TEST_AC_OCCURENCES, errorOccurence); TAP_TEST_EQUAL(results.at(aho.get(seqs[8])), 1, TEST_AC_OCCURENCES, errorOccurence); TAP_TEST_EQUAL(results.at(aho.get(seqs[9])), 2, TEST_AC_OCCURENCES, errorOccurence); /* Situation: Only one K-mer is in the sequence, appearing once. */ seqtype seq2 = "AAAAAAAAAAAAAAAAAATTCAAAAAAAAA"; results = aho.getMultiResults(seq2); TAP_TEST(results.size() <= 2, TEST_AC_OCCURENCES, errorSize); TAP_TEST_EQUAL(results.at(aho.get(seqs[2])), 1, TEST_AC_OCCURENCES, errorOccurence); /* Situation: Only one K-mer is the sequence, appearing many times. */ seqtype seq3 = "GCTACGCTACGCTACGCTACGCTA"; results = aho.getMultiResults(seq3); TAP_TEST(results.size() <= 2, TEST_AC_OCCURENCES, errorSize); TAP_TEST_EQUAL(results.at(aho.get(seqs[8])), 4, TEST_AC_OCCURENCES, errorOccurence); /* Situation: No K-mer appear in the sequence. */ seqtype seq4 = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; results = aho.getMultiResults(seq4); TAP_TEST(results.size() <= 1, TEST_AC_OCCURENCES, errorSize); /* If there is K-mers in automaton doesn't match the sequence, the map must return only unknown K-mers. */ pair<KmerAffect, int> singleResult = *(results.begin()); KmerAffect unknownKmerAffect = singleResult.first; TAP_TEST_EQUAL(unknownKmerAffect, AFFECT_UNKNOWN, TEST_AC_OCCURENCES, "Unknown Kmer not found"); }
void testSimpleInsertACAutomaton() { PointerACAutomaton<Kmer> aho; Kmer count_acag = Kmer("ACAG"); count_acag.count = 5; Kmer count_caga = Kmer("CAGA"); count_caga.count = 2; Kmer count_caca = Kmer("CACA"); count_caca.count = 3; Kmer count_gca = Kmer("GCA"); count_gca.count = 1; aho.insert("ACAG", count_acag); aho.insert("CAGA", count_caga); aho.insert("CACA", count_caca); aho.insert("GCA", count_gca); aho.build_failure_functions(); pointer_state<Kmer> *state_ac = aho.goto_state("ac"); pointer_state<Kmer> *state_aca = aho.goto_state("aca"); pointer_state<Kmer> *state_c = aho.goto_state("c"); pointer_state<Kmer> *state_g = aho.goto_state("g"); pointer_state<Kmer> *state_cag = aho.goto_state("cag"); TAP_TEST(aho.getInitialState()->transitions[T] == aho.getInitialState() && aho.getInitialState()->transitions[N] == aho.getInitialState(), TEST_AC_TRANSITIONS, ""); TAP_TEST(state_ac->transitions[A] == state_aca, TEST_AC_TRANSITIONS, ""); TAP_TEST(state_ac->transitions[C] == state_c, TEST_AC_TRANSITIONS, ""); TAP_TEST(state_aca->transitions[A] == state_g->transitions[A], TEST_AC_TRANSITIONS, ""); TAP_TEST(state_c->transitions[C] == state_c, TEST_AC_TRANSITIONS, ""); TAP_TEST(state_g->transitions[G] == state_g, TEST_AC_TRANSITIONS, ""); TAP_TEST(state_g->transitions[C]->transitions[C] == state_c, TEST_AC_TRANSITIONS, ""); TAP_TEST(state_cag->transitions[G] == state_g, TEST_AC_TRANSITIONS, ""); TAP_TEST(state_cag->transitions[A]->is_final, TEST_AC_FINAL, ""); string caga = "caga"; string caca = "caca"; string acag = "acag"; TAP_TEST(aho.get(caga).count == 2, TEST_AC_GET, ""); TAP_TEST(aho.get(caca).count == 3, TEST_AC_GET, ""); TAP_TEST(aho.get(acag).count == 5, TEST_AC_GET, ""); }
void testFastaOutputOperator(){ ostringstream oss; Fasta fa("../../data/test1.fa"); oss << fa; TAP_TEST(oss.str() == ">seq1\nACAAC\n>seq2\nCGACCCCCAA\n>seq3\nA\n>seq4\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n>\nAATN\n", TEST_FASTA_OUT, oss.str()); }
void testRevcompInt() { TAP_TEST(revcomp_int(dna_to_int("AA", 2), 2) == dna_to_int("TT", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("AC", 2), 2) == dna_to_int("GT", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("AG", 2), 2) == dna_to_int("CT", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("AT", 2), 2) == dna_to_int("AT", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("CA", 2), 2) == dna_to_int("TG", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("CC", 2), 2) == dna_to_int("GG", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("CG", 2), 2) == dna_to_int("CG", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("CT", 2), 2) == dna_to_int("AG", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("GA", 2), 2) == dna_to_int("TC", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("GC", 2), 2) == dna_to_int("GC", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("GT", 2), 2) == dna_to_int("AC", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("TA", 2), 2) == dna_to_int("TA", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("TC", 2), 2) == dna_to_int("GA", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("TG", 2), 2) == dna_to_int("CA", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("TT", 2), 2) == dna_to_int("AA", 2), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("AAAAAAA", 7), 7) == dna_to_int("TTTTTTT", 7), TEST_REVCOMP_INT, ""); TAP_TEST(revcomp_int(dna_to_int("ATTAGGA", 7), 7) == dna_to_int("TCCTAAT", 7), TEST_REVCOMP_INT, "revcomp: " << revcomp_int(dna_to_int("ATTAGGA", 7), 7) <<", dna_to_int: " << dna_to_int("TCCTAAT", 7)); }
void testDNAToInt() { TAP_TEST(dna_to_int("A", 1) == 0, TEST_DNA_TO_INT, ""); TAP_TEST(dna_to_int("AAAAAAA", 7) == 0, TEST_DNA_TO_INT, ""); TAP_TEST(dna_to_int("ATTAGGA", 7) == 3880, TEST_DNA_TO_INT, ""); TAP_TEST(dna_to_int("TTTT", 4) == 255, TEST_DNA_TO_INT, ""); }
void testNucToInt() { TAP_TEST(nuc_to_int('A') == 0, TEST_NUC_TO_INT, ""); TAP_TEST(nuc_to_int('C') == 1, TEST_NUC_TO_INT, ""); TAP_TEST(nuc_to_int('G') == 2, TEST_NUC_TO_INT, ""); TAP_TEST(nuc_to_int('T') == 3, TEST_NUC_TO_INT, ""); }
void testRevcomp() { TAP_TEST(complement("AATCAGactgactagATCGAn") == "TTAGTCTGACTGATCTAGCTN", TEST_REVCOMP, ""); TAP_TEST(revcomp("AATCAGactgactagATCGAn") == "NTCGATCTAGTCAGTCTGATT", TEST_REVCOMP, ""); TAP_TEST(revcomp("") == "", TEST_REVCOMP, ""); TAP_TEST(revcomp("aaaaaa") == "TTTTTT", TEST_REVCOMP, ""); }