void testRevcompRepresentative() {
  list<Sequence> reads = Fasta("../../data/representative_revcomp.fa").getAll();

  KmerRepresentativeComputer krc(reads, "##############");
  krc.setOptions(false, 3, 0.5);
  krc.setCoverageReferenceLength(50);
  krc.compute();
  Sequence representative = krc.getRepresentative();

  // Computing reads revcomp
  for (list <Sequence>::iterator it = reads.begin(); it != reads.end(); it++) {
    it->sequence = revcomp(it->sequence);
  }

  KmerRepresentativeComputer krc2(reads, "##############");
  krc2.setOptions(false, 3, 0.5);
  krc2.setCoverageReferenceLength(50);
  krc2.compute();
  Sequence representative2 = krc2.getRepresentative();

  // Check position of [ in label, so that we remove that part, and then we
  // can compare the labels
  size_t pos1 = representative.label.find_first_of('[');
  size_t pos2 = representative2.label.find_first_of('[');

  TAP_TEST(representative.label.substr(0, pos1) == representative2.label.substr(0, pos2), TEST_KMER_REPRESENTATIVE_REVCOMP,
           "The two representatives should have the same label");

  TAP_TEST(revcomp(representative.sequence) == representative2.sequence, TEST_KMER_REPRESENTATIVE_REVCOMP,
           "The two representatives should have the same sequence (but revcomp-ed)");

}
Beispiel #2
0
void testFastaNbSequences() {
  TAP_TEST(nb_sequences_in_fasta("../../germline/IGHV.fa") == 348, TEST_FASTA_NB_SEQUENCES, "ccc");

  int a1 = approx_nb_sequences_in_fasta("../../germline/IGHV.fa");
  TAP_TEST(a1 >= 340 && a1 <= 348, TEST_FASTA_NB_SEQUENCES, "");

  int a2 = nb_sequences_in_fasta("../../data/Stanford_S22.fasta", true);
  TAP_TEST(a2 >= 13100 && a2 <= 13200, TEST_FASTA_NB_SEQUENCES, "");
}
Beispiel #3
0
void testSequenceOutputOperator() {
  ostringstream oss;
  Sequence seq = {"a b c", "a", "GATTACA", "AIIIIIH", NULL};
  oss << seq;

  TAP_TEST(oss.str() == "@a\nGATTACA\n+\nAIIIIIH\n", TEST_SEQUENCE_OUT, oss.str());

  ostringstream oss2;
  seq.quality = "";
  oss2 << seq;

  TAP_TEST(oss2.str() == ">a\nGATTACA\n", TEST_SEQUENCE_OUT, oss.str());
}
Beispiel #4
0
void testFastaAdd() {
  Fasta fa1("../../data/test1.fa");
  Fasta fa2("../../data/test1.fa");
  fa2.add("../../data/test1.fa");

  TAP_TEST(fa1.size() * 2 == fa2.size(), TEST_FASTA_ADD, "");
  for (int i=0; i < fa1.size(); i++) {
    TAP_TEST(fa1.label(i) == fa2.label(i)
             && fa1.label(i) == fa2.label(i+fa1.size()), TEST_FASTA_ADD, "");
    TAP_TEST(fa1.label_full(i) == fa2.label_full(i)
             && fa1.label_full(i) == fa2.label_full(i+fa1.size()), 
             TEST_FASTA_ADD, "");
    TAP_TEST(fa1.sequence(i) == fa2.sequence(i)
             && fa1.sequence(i) == fa2.sequence(i+fa1.size()), 
             TEST_FASTA_ADD, "");
  }
}
Beispiel #5
0
void testScore() {
  // ReadLengthScore testing
  ReadLengthScore rls;

  Sequence seq1 = {"seq", "seq", "", "", 0};
  TAP_TEST_EQUAL(rls.getScore(seq1), 0., TEST_LENGTH_SCORE,
           "score should be 0, is " << rls.getScore(seq1));

  Sequence seq2 = {"seq", "seq", "ATCGTTTACGTC", "", 0};
  TAP_TEST_EQUAL(rls.getScore(seq2), 12., TEST_LENGTH_SCORE,
           "score should be 12, is " << rls.getScore(seq2));

  Sequence seq3 = {"seq", "seq", "A", "", 0};
  TAP_TEST_EQUAL(rls.getScore(seq3), 1., TEST_LENGTH_SCORE,
           "score should be 1, is " << rls.getScore(seq3));


  // ReadQualityScore testing
  ReadQualityScore rqs;

  Sequence seq4 = {"s", "s", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", 0};
  TAP_TEST((int) rqs.getScore(seq4) == (int) (41 * 120/ GOOD_QUALITY), TEST_QUALITY_SCORE,
           "score should be " << (int) (41 * 120/ GOOD_QUALITY) << " not " << rqs.getScore(seq4));

  // Changing the quality, put the percentile should not change yet.
  seq4.quality[10] = '-';
  TAP_TEST((int) rqs.getScore(seq4) == (int) (41 * 120/ GOOD_QUALITY), TEST_QUALITY_SCORE,
           "score should be " << (int) (41 * 120/ GOOD_QUALITY) << " not " << rqs.getScore(seq4));


  // Now the percentile value should change and should correspond to '-'
  seq4.quality[22] = '!';
  TAP_TEST((int) rqs.getScore(seq4) == (int) (('-' - ' ') * 120/ GOOD_QUALITY), TEST_QUALITY_SCORE,
           "score should be " << (int) (('-' - ' ') * 120/ GOOD_QUALITY) << " not " << rqs.getScore(seq4));

  // Quality does not exist anymore → the score is the length
  seq4.quality = "";
  TAP_TEST_EQUAL(rqs.getScore(seq4), 120, TEST_QUALITY_SCORE,
           "score should be 120 not " << rqs.getScore(seq4));

}
Beispiel #6
0
void testExtendedNucleotides() {
  TAP_TEST(is_extended_nucleotide('A') == false, TEST_EXTENDED_NUCL, "");
  TAP_TEST(is_extended_nucleotide('a') == false, TEST_EXTENDED_NUCL, "");
  TAP_TEST(is_extended_nucleotide('N') == true,  TEST_EXTENDED_NUCL, "");
  TAP_TEST(is_extended_nucleotide(' ') == true,  TEST_EXTENDED_NUCL, "");

  TAP_TEST(has_extended_nucleotides("") == false, TEST_EXTENDED_NUCL, "");
  TAP_TEST(has_extended_nucleotides("ACGTacgt") == false, TEST_EXTENDED_NUCL, "");
  TAP_TEST(has_extended_nucleotides("ACGTnacgt") == true, TEST_EXTENDED_NUCL, "");
 }
Beispiel #7
0
void testChooser() {
  list<Sequence> reads = BioReader("data/test1.fa").getAll();

  ReadLengthScore rls;
  ReadChooser rc(reads, rls);

  TAP_TEST(rc.getithBest(1).label == "seq4", TEST_READ_CHOOSER_SORTED,
           "First sequence is " << rc.getithBest(1).label);
  TAP_TEST(rc.getithBest(1).label == rc.getBest().label
           && rc.getithBest(1).sequence == rc.getBest().sequence, 
           TEST_READ_CHOOSER_BEST,"");

  TAP_TEST(rc.getithBest(2).label == "seq2", TEST_READ_CHOOSER_SORTED,
           "Second sequence is " << rc.getithBest(2).label);

  TAP_TEST(rc.getithBest(3).label == "seq1", TEST_READ_CHOOSER_SORTED,
           "Third sequence is " << rc.getithBest(3).label);

  TAP_TEST(rc.getithBest(4).label == "", TEST_READ_CHOOSER_SORTED,
           "First sequence is " << rc.getithBest(4).label);

}
Beispiel #8
0
void testFasta1() {
  Fasta fa("../../data/test1.fa");
  Fasta fq("../../data/test1.fq");

  TAP_TEST(fa.size() == fq.size(), TEST_FASTA_SIZE, "");
  for (int i=0; i < fa.size(); i++) {
    TAP_TEST(fa.label(i) == fq.label(i), TEST_FASTA_LABEL, "");
    TAP_TEST(fa.label_full(i) == fq.label_full(i), TEST_FASTA_LABEL_FULL, "");
    TAP_TEST(fa.sequence(i) == fq.sequence(i), TEST_FASTA_SEQUENCE, "");
  }
  TAP_TEST(fa.label(2) == "seq3", TEST_FASTA_LABEL, "");
  TAP_TEST(fa.sequence(2) == "A", TEST_FASTA_SEQUENCE, "");
  TAP_TEST(fa.label(4) == "", TEST_FASTA_LABEL, "");
  TAP_TEST(fa.sequence(4) == "AATN", TEST_FASTA_SEQUENCE, "");
}
Beispiel #9
0
void testCreateSequence() {
  Sequence seq1 = create_sequence("label", "l", "AAAAAAAAAA", "!!!!!!!!!!");
  Sequence seq2 = create_sequence("", "", "", "");

  TAP_TEST(seq1.label_full == "label", TEST_CREATE_SEQUENCE_LABEL_FULL, "");
  TAP_TEST(seq2.label_full == "", TEST_CREATE_SEQUENCE_LABEL_FULL, "");

  TAP_TEST(seq1.label == "l", TEST_CREATE_SEQUENCE_LABEL, "");
  TAP_TEST(seq2.label == "", TEST_CREATE_SEQUENCE_LABEL, "");

  TAP_TEST(seq1.sequence == "AAAAAAAAAA", TEST_CREATE_SEQUENCE_SEQUENCE, "");
  TAP_TEST(seq2.sequence == "", TEST_CREATE_SEQUENCE_SEQUENCE, "");

  TAP_TEST(seq1.quality == "!!!!!!!!!!", TEST_CREATE_SEQUENCE_QUALITY, "");
  TAP_TEST(seq2.quality == "", TEST_CREATE_SEQUENCE_QUALITY, "");
}
Beispiel #10
0
void testRCInsertAcAutomaton() {
  PointerACAutomaton<KmerAffect> aho(true);

  aho.insert("ACAGTC", "V", true, 0, "##-##");
  aho.build_failure_functions();
  // Will insert AC-GT → ACAGT, ACCGT, ACGGT, ACTGT
  //         and CA-TC → CAATC, CACTC, CAGTC, CATTC
  // plus the revcomps:
  // Will insert GA-TG → GAATG, GACTG, GAGTG, GATTG

  //         and AC-GT → ACAGT, ACCGT, ACGGT, ACTGT

  pointer_state<KmerAffect> *state = aho.goto_state("ACCGT");

  TAP_TEST_EQUAL(state->informations.size(), 1, TEST_AC_GET, "");
  TAP_TEST_EQUAL(state->informations.front(), AFFECT_AMBIGUOUS, TEST_AC_GET, "");
  TAP_TEST(state->is_final, TEST_AC_FINAL, "");

  TAP_TEST(! aho.goto_state("CAAT")->is_final, TEST_AC_FINAL, "");
  TAP_TEST(aho.goto_state("CAAT")->informations.size() == 1, TEST_AC_GET, "");
  TAP_TEST(aho.goto_state("CAAT")->informations.front() == AFFECT_UNKNOWN, TEST_AC_GET, "");

  TAP_TEST(aho.goto_state("GAGTG")->informations.front() == AFFECT_V_BWD, TEST_AC_GET, "");
  TAP_TEST(aho.goto_state("GAGTG")->is_final, TEST_AC_FINAL, "");

  TAP_TEST(aho.goto_state("GAGTG")->transitions[A] == aho.goto_state("GA"), TEST_AC_TRANSITIONS, "");

  vector<KmerAffect> results = aho.getResults("ACCGTgaatgCATTCA");
  vector<KmerAffect> expected = {AFFECT_AMBIGUOUS,
                                 AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN,
                                 AFFECT_UNKNOWN, AFFECT_V_BWD, AFFECT_UNKNOWN,
                                 AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN,
                                 AFFECT_V, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN, AFFECT_UNKNOWN,
                                 AFFECT_UNKNOWN };

  TAP_TEST(results.size() == expected.size(), TEST_AC_GET_RESULTS, "");
  TAP_TEST(results == expected, TEST_AC_GET_RESULTS, "");
}
Beispiel #11
0
void testExtractBasename() {
  TAP_TEST(extract_basename("/var/toto/titi/tutu/bla.bli.bluc", true) == "bla.bli",
           TEST_EXTRACT_BASENAME, extract_basename("/var/toto/titi/tutu/bla.bli.bluc", true));
  TAP_TEST(extract_basename("/var/toto/titi/tutu/bla.bli.bluc", false) == "bla.bli.bluc",
           TEST_EXTRACT_BASENAME, extract_basename("/var/toto/titi/tutu/bla.bli.bluc", false));
  TAP_TEST(extract_basename("bla.bli.bluc", true) == "bla.bli",
           TEST_EXTRACT_BASENAME, extract_basename("bla.bli.bluc", true));
  TAP_TEST(extract_basename("bla.bli.bluc", false) == "bla.bli.bluc",
           TEST_EXTRACT_BASENAME, extract_basename("bla.bli.bluc", false));
  TAP_TEST(extract_basename("a_filename_without_extension", true) == "a_filename_without_extension",
           TEST_EXTRACT_BASENAME, extract_basename("a_filename_without_extension", true));
  TAP_TEST(extract_basename("/", true) == "",
           TEST_EXTRACT_BASENAME, extract_basename("/", true));
}
void testRepresentative() {
  list<Sequence> reads = Fasta("../../data/representative.fa").getAll();

  KmerRepresentativeComputer krc(reads, "##############");

  krc.setStabilityLimit(0);
  krc.setRevcomp(false);
  krc.setMinCover(1);
  krc.setPercentCoverage(0.5);
  krc.setCoverageReferenceLength(50);

  krc.compute();
  Sequence representative = krc.getRepresentative();

  // Seq3 is the longest it should be taken when performing 0 extra iteration
  TAP_TEST(representative.label.find("seq3-[37,73]") == 0, TEST_KMER_REPRESENTATIVE,
           "If we take the first representative we should have seq3, and not at the beginning (" << representative.label << " instead)");

  krc.setStabilityLimit(1);
  krc.compute();
  representative = krc.getRepresentative();
  TAP_TEST(representative.label.find("seq3-[37,73]") == 0, TEST_KMER_REPRESENTATIVE,
           "When allowing one step before stability, we should still have seq3 (" << representative.label << " instead)");

  krc.setStabilityLimit(2);
  krc.compute();
  representative = krc.getRepresentative();
  TAP_TEST(representative.label.find("seq1-[0,41]") == 0, TEST_KMER_REPRESENTATIVE,
           "When allowing two steps before stability, we should reach seq1 (" << representative.label << " instead)");

  krc.setRevcomp(true);
  krc.setRequiredSequence("ATCGCGCCCT"); // revcomp 
  krc.compute();
  representative = krc.getRepresentative();
  TAP_TEST(representative.label.find("seq2-[33,52]") == 0, TEST_KMER_REPRESENTATIVE_REQUIRED_SEQ,
           "When requiring sequence ATCGCGCCCT, we should have seq2 (" << representative.label << " instead)");

  krc.setRevcomp(false);
  krc.compute();
  TAP_TEST(! krc.hasRepresentative(), TEST_KMER_REPRESENTATIVE_REQUIRED_SEQ,
           "When requiring sequence AGGGCGCGAT and revcomp=false, we shouldn't find anything (the sequence is revcomp-ed)");

  krc.setRequiredSequence("");

  krc.setMinCover(4);
  krc.compute();
  TAP_TEST(! krc.hasRepresentative(), TEST_KMER_REPRESENTATIVE,
           "When requiring 4 reads to support the representative, we should not find any solution.");
}
Beispiel #13
0
void testLongest() {
  list<Sequence> seqs;

  seqs.push_back(create_sequence("seq1", "seq1", "AAAAAAAAA", ""));
  seqs.push_back(create_sequence("seq2", "seq2", "AAAAA", ""));
  seqs.push_back(create_sequence("seq3", "seq3", "AAAAAAAA", ""));
  seqs.push_back(create_sequence("seq4", "seq4", "AAAAAAAAAA", ""));
  seqs.push_back(create_sequence("seq5", "seq5", "AAAAAA", ""));
  seqs.push_back(create_sequence("seq6", "seq6", "AAAAAAA", ""));

  SequenceSampler s(seqs);

  list<Sequence> l1 = s.getLongest(6, 11);
  size_t *distrib = s.getLengthDistribution();

  TAP_TEST(distrib[0] == 0
           && distrib[1] == 0 && distrib[2] == 0 && distrib[3] == 0 && distrib[4] == 0
           && distrib[5] == 1 && distrib[6] == 1 && distrib[7] == 1 && distrib[8] == 1
           && distrib[9] == 1 && distrib[10] == 1,
           TEST_SAMPLER_LENGTH, "");

  char id = '1';
  TAP_TEST(l1.size() == 6, TEST_SAMPLER_LONGEST, "");
  for (list<Sequence>::const_iterator it = l1.begin(); it != l1.end(); it++) {
    TAP_TEST(it->label[3] == id, TEST_SAMPLER_LONGEST, "");
    id++;
  }

  // With only 10 buckets, the two longest sequences share the same bucket.
  // Due to their insertion order, the shorter will be sampled first
  l1 = s.getLongest(2, 10);
  distrib = s.getLengthDistribution();

  TAP_TEST(distrib[0] == 0
           && distrib[1] == 0 && distrib[2] == 0 && distrib[3] == 0 && distrib[4] == 0
           && distrib[5] == 1 && distrib[6] == 1 && distrib[7] == 1 && distrib[8] == 1
           && distrib[9] == 2, TEST_SAMPLER_LENGTH, "");

  TAP_TEST(l1.size() == 2, TEST_SAMPLER_LONGEST, "");
  TAP_TEST(l1.front().sequence.size() == 9, TEST_SAMPLER_LONGEST, "");
  Sequence next = *(++l1.begin());
  TAP_TEST(next.sequence.size() == 10, TEST_SAMPLER_LONGEST, "label = " << next.label);
}
Beispiel #14
0
// Generate 10 sequences, and launch 10 times getRandom(1).
// We should not have the same sequence 10 times (p < 10^{-10})
void testRandom() {
  list<Sequence> seqs;
  string seg_name = "seq";
  char id = '0';
  string sequence = "AA";

  for (int i = 0; i < 10; i++) {
    seqs.push_back(create_sequence("seq" + string_of_int(id), "seq" + string_of_int(id), sequence, ""));
    sequence += "A";
    id++;
  }

  SequenceSampler sampler(seqs);
  string first_random = sampler.getRandom(1).front().label;
  bool all_equal = true;
  for (int i = 0; i < 9 && all_equal; i++) {
    if (first_random != sampler.getRandom(1).front().label)
      all_equal = false;
  }

  TAP_TEST(all_equal == false, TEST_SAMPLER_RANDOM, "On the 10 trials, we drawn 10 times the same sequence");
}
Beispiel #15
0
void testOnlineFasta1() {
  OnlineFasta fa("../../data/test1.fa");
  OnlineFasta fq("../../data/test1.fq");
  int nb_seq = 0;

  TAP_TEST(fa.getLineNb() == 1, TEST_O_FASTA_LINE_NB, "");
  TAP_TEST(fq.getLineNb() == 1, TEST_O_FASTA_LINE_NB, "");

  while (fa.hasNext()) {
    TAP_TEST(fq.hasNext(), TEST_O_FASTA_HAS_NEXT, "");
    fa.next();
    fq.next();
    Sequence s1 = fa.getSequence();
    Sequence s2 = fq.getSequence();
    TAP_TEST(s1.label == s2.label && s1.label_full == s2.label_full
             && s1.sequence == s2.sequence, TEST_O_FASTA_GET_SEQUENCE, "fa: " << fa.getSequence() << endl << "fq: " << fq.getSequence());
    nb_seq++;
  }
  TAP_TEST(fq.getLineNb() == 20, TEST_O_FASTA_LINE_NB, "");
  TAP_TEST(! fq.hasNext(), TEST_O_FASTA_HAS_NEXT, "");
  TAP_TEST(nb_seq == 5, TEST_O_FASTA_HAS_NEXT, "");
}
Beispiel #16
0
void testFastaAddThrows() {
  bool caught = false;
  try {
    Fasta fa1("mlkdkklflskjfskldfj.fa");
  } catch (invalid_argument e) {
    TAP_TEST(string(e.what()).find("Error in opening file") != string::npos, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");

  Fasta fa1("../../data/test1.fa");

  caught = false;
  try {
    fa1.add("ljk:lkjsdfsdlfjsdlfkjs.fa");
  } catch (invalid_argument e) {
    TAP_TEST(string(e.what()).find("Error in opening file") != string::npos, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");

  caught = false;
  try {
    fa1.add("testTools.cpp");
  } catch (invalid_argument e) {
    TAP_TEST(string(e.what()).find("The file seems to be malformed") != string::npos, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");

  caught = false;
  try {
    OnlineFasta fa("lkjdflkdfjglkdfjg.fa");
  } catch (invalid_argument e) {
    TAP_TEST(string(e.what()).find("Error in opening file") != string::npos, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");

  caught = false;
  try {
    Fasta fa1("../../data/malformed1.fq");
  } catch (invalid_argument e) {
    TAP_TEST(string(e.what()).find("Expected line starting with +") != string::npos, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");
  caught = false;
  try {
    Fasta fa1("../../data/malformed2.fq");
  } catch (invalid_argument e) {
    TAP_TEST(string(e.what()).find("Unexpected EOF") == 0, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");
  caught = false;
  try {
    Fasta fa1("../../data/malformed3.fq");
  } catch (invalid_argument e) {
    TAP_TEST(string(e.what()).find("Quality and sequence don't have the same length") == 0, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");
  caught = false;
  try {
    Fasta fa1("../../data/malformed4.fq");
  } catch (invalid_argument e) {
    TAP_TEST(string(e.what()).find("Unexpected EOF") == 0, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");
  caught = false;
  try {
    Fasta fa1("../../data/malformed5.fq");
  } catch (invalid_argument e) {
    TAP_TEST(string(e.what()).find("Unexpected EOF") == 0, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");
  caught = false;
  try {
    // Can't test empty file with Fasta since we
    // don't complain for empty files explicitly in Fasta constructor.
    OnlineFasta fa1("../../data/malformed6.fq");
    fa1.next();
  } catch (invalid_argument e) {
    TAP_TEST(string(e.what()).find("Unexpected EOF") == 0, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");

  caught = false;
  try {
    Fasta fa1("../../data/malformed7.fq");
  } catch(invalid_argument e) {
    TAP_TEST(string(e.what()).find("Unexpected EOF") == 0, TEST_FASTA_INVALID_FILE, "");
    caught = true;
  }
  TAP_TEST(caught == true, TEST_FASTA_INVALID_FILE, "");
}
Beispiel #17
0
void testNChooseK() {
  TAP_TEST(nChoosek(1, 10) == 0, TEST_N_CHOOSE_K, "");
  TAP_TEST(nChoosek(1, 1) == 1, TEST_N_CHOOSE_K, "");
  TAP_TEST(nChoosek(5, 2) == 10, TEST_N_CHOOSE_K, "");
  TAP_TEST(nChoosek(8, 4) == 70, TEST_N_CHOOSE_K, "");
}
Beispiel #18
0
/* 
  This test check the integrity of the getMultiResults function in
   AbstractACAutomaton class and its inherited classes.
*/
void testGetMultiResults(){
  map<KmerAffect,int> results;
  PointerACAutomaton<KmerAffect> aho(false);
  const string errorOccurence = "KmerAffect doesn't have the good number of occurences.";
  const string errorSize = "Map has too many Kmers.";
  seqtype seq = "TTTTAATTAAGGGGCTACCCCCAATGTCCGTGGAGCTCTGGGGGGTTA";
  affect_t affect[10];
  seqtype seqs[10];
  char c = 'a';
  for(int i = 0; i < 10; ++i){
    affect[i].c = c;
    c++;
  }
  seqs[0] = "AGCTCT";
  seqs[1] = "TTTT";
  seqs[2] = "AATT";
  seqs[3] = "CGTGG";
  seqs[4] = "CAATGTC";
  seqs[5] = "AGGG";
  seqs[6] = "GGGG";
  seqs[7] = "TTAA";
  seqs[8] = "GCTAC";
  seqs[9] = "CCCC";
  
  for(int i = 0;i < 10; ++i){
    aho.insert(seqs[i], KmerAffect(affect[i]));
  }
  aho.build_failure_functions();
  results = aho.getMultiResults(seq);

  /* Best situation: every sequences is found at least once in automaton. */
  TAP_TEST(results.size() <= 11, TEST_AC_OCCURENCES, errorSize);
  TAP_TEST_EQUAL(results.at(aho.get(seqs[0])), 1, TEST_AC_OCCURENCES, errorOccurence);  
  TAP_TEST_EQUAL(results.at(aho.get(seqs[1])), 1, TEST_AC_OCCURENCES, errorOccurence);  
  TAP_TEST_EQUAL(results.at(aho.get(seqs[2])), 1, TEST_AC_OCCURENCES, errorOccurence);  
  TAP_TEST_EQUAL(results.at(aho.get(seqs[3])), 1, TEST_AC_OCCURENCES, errorOccurence);  
  TAP_TEST_EQUAL(results.at(aho.get(seqs[4])), 1, TEST_AC_OCCURENCES, errorOccurence);  
  TAP_TEST_EQUAL(results.at(aho.get(seqs[5])), 1, TEST_AC_OCCURENCES, errorOccurence);  
  TAP_TEST_EQUAL(results.at(aho.get(seqs[6])), 4, TEST_AC_OCCURENCES, errorOccurence);  
  TAP_TEST_EQUAL(results.at(aho.get(seqs[7])), 2, TEST_AC_OCCURENCES, errorOccurence);  
  TAP_TEST_EQUAL(results.at(aho.get(seqs[8])), 1, TEST_AC_OCCURENCES, errorOccurence);  
  TAP_TEST_EQUAL(results.at(aho.get(seqs[9])), 2, TEST_AC_OCCURENCES, errorOccurence);  
  
  /* Situation: Only one K-mer is in the sequence, appearing once. */
  seqtype seq2 = "AAAAAAAAAAAAAAAAAATTCAAAAAAAAA";
  results = aho.getMultiResults(seq2);
  TAP_TEST(results.size() <= 2, TEST_AC_OCCURENCES, errorSize);
  TAP_TEST_EQUAL(results.at(aho.get(seqs[2])), 1, TEST_AC_OCCURENCES, errorOccurence);

  /* Situation: Only one K-mer is the sequence, appearing many times. */
  seqtype seq3 = "GCTACGCTACGCTACGCTACGCTA";
  results = aho.getMultiResults(seq3);
  TAP_TEST(results.size() <= 2, TEST_AC_OCCURENCES, errorSize);
  TAP_TEST_EQUAL(results.at(aho.get(seqs[8])), 4, TEST_AC_OCCURENCES, errorOccurence);
  
  /* Situation: No K-mer appear in the sequence. */
  seqtype seq4 = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
  results = aho.getMultiResults(seq4);
  TAP_TEST(results.size() <= 1, TEST_AC_OCCURENCES, errorSize);
  /*
    If there is K-mers in automaton doesn't match the sequence, the map must
    return only unknown K-mers.
  */
  pair<KmerAffect, int> singleResult = *(results.begin());
  KmerAffect unknownKmerAffect = singleResult.first;
  TAP_TEST_EQUAL(unknownKmerAffect, AFFECT_UNKNOWN, TEST_AC_OCCURENCES, "Unknown Kmer not found");
}
Beispiel #19
0
void testSimpleInsertACAutomaton() {
  PointerACAutomaton<Kmer> aho;

  Kmer count_acag = Kmer("ACAG");
  count_acag.count = 5;
  Kmer count_caga = Kmer("CAGA");
  count_caga.count = 2;
  Kmer count_caca = Kmer("CACA");
  count_caca.count = 3;
  Kmer count_gca = Kmer("GCA");
  count_gca.count = 1;

  aho.insert("ACAG", count_acag);
  aho.insert("CAGA", count_caga);
  aho.insert("CACA", count_caca);
  aho.insert("GCA", count_gca);

  aho.build_failure_functions();

  pointer_state<Kmer> *state_ac = aho.goto_state("ac");
  pointer_state<Kmer> *state_aca = aho.goto_state("aca");
  pointer_state<Kmer> *state_c = aho.goto_state("c");
  pointer_state<Kmer> *state_g = aho.goto_state("g");
  pointer_state<Kmer> *state_cag = aho.goto_state("cag");

  TAP_TEST(aho.getInitialState()->transitions[T] == aho.getInitialState()
           && aho.getInitialState()->transitions[N] == aho.getInitialState(),
           TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_ac->transitions[A] == state_aca, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_ac->transitions[C] == state_c, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_aca->transitions[A] == state_g->transitions[A], TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_c->transitions[C] == state_c, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_g->transitions[G] == state_g, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_g->transitions[C]->transitions[C] == state_c, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_cag->transitions[G] == state_g, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_cag->transitions[A]->is_final, TEST_AC_FINAL, "");

  string caga = "caga";
  string caca = "caca";
  string acag = "acag";
  TAP_TEST(aho.get(caga).count == 2, TEST_AC_GET, "");
  TAP_TEST(aho.get(caca).count == 3, TEST_AC_GET, "");
  TAP_TEST(aho.get(acag).count == 5, TEST_AC_GET, "");
}
Beispiel #20
0
void testFastaOutputOperator(){
  ostringstream oss;
  Fasta fa("../../data/test1.fa");
  oss << fa;
  TAP_TEST(oss.str() == ">seq1\nACAAC\n>seq2\nCGACCCCCAA\n>seq3\nA\n>seq4\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n>\nAATN\n", TEST_FASTA_OUT, oss.str());
}
Beispiel #21
0
void testRevcompInt() {
  TAP_TEST(revcomp_int(dna_to_int("AA", 2), 2) == dna_to_int("TT", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("AC", 2), 2) == dna_to_int("GT", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("AG", 2), 2) == dna_to_int("CT", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("AT", 2), 2) == dna_to_int("AT", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("CA", 2), 2) == dna_to_int("TG", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("CC", 2), 2) == dna_to_int("GG", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("CG", 2), 2) == dna_to_int("CG", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("CT", 2), 2) == dna_to_int("AG", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("GA", 2), 2) == dna_to_int("TC", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("GC", 2), 2) == dna_to_int("GC", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("GT", 2), 2) == dna_to_int("AC", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("TA", 2), 2) == dna_to_int("TA", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("TC", 2), 2) == dna_to_int("GA", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("TG", 2), 2) == dna_to_int("CA", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("TT", 2), 2) == dna_to_int("AA", 2),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("AAAAAAA", 7), 7) == dna_to_int("TTTTTTT", 7),
           TEST_REVCOMP_INT, "");
  TAP_TEST(revcomp_int(dna_to_int("ATTAGGA", 7), 7) == dna_to_int("TCCTAAT", 7),
           TEST_REVCOMP_INT, "revcomp: " << revcomp_int(dna_to_int("ATTAGGA", 7), 7) <<", dna_to_int: " << dna_to_int("TCCTAAT", 7));
}
Beispiel #22
0
void testDNAToInt() {
  TAP_TEST(dna_to_int("A", 1) == 0, TEST_DNA_TO_INT, "");
  TAP_TEST(dna_to_int("AAAAAAA", 7) == 0, TEST_DNA_TO_INT, "");
  TAP_TEST(dna_to_int("ATTAGGA", 7) == 3880, TEST_DNA_TO_INT, "");
  TAP_TEST(dna_to_int("TTTT", 4) == 255, TEST_DNA_TO_INT, "");
}
Beispiel #23
0
void testNucToInt() {
  TAP_TEST(nuc_to_int('A') == 0, TEST_NUC_TO_INT, "");
  TAP_TEST(nuc_to_int('C') == 1, TEST_NUC_TO_INT, "");
  TAP_TEST(nuc_to_int('G') == 2, TEST_NUC_TO_INT, "");
  TAP_TEST(nuc_to_int('T') == 3, TEST_NUC_TO_INT, "");
}
Beispiel #24
0
void testRevcomp() {
  TAP_TEST(complement("AATCAGactgactagATCGAn") == "TTAGTCTGACTGATCTAGCTN", TEST_REVCOMP, "");
  TAP_TEST(revcomp("AATCAGactgactagATCGAn") == "NTCGATCTAGTCAGTCTGATT", TEST_REVCOMP, "");
  TAP_TEST(revcomp("") == "", TEST_REVCOMP, "");
  TAP_TEST(revcomp("aaaaaa") == "TTTTTT", TEST_REVCOMP, "");
}