void testCluster() {

  int w=40;
   string forced_edges = "" ;

   map<string, string> labels;
   WindowsStorage windows = WindowsStorage(labels);

   Sequence seq = {"", "", "", "", NULL};

   windows.add("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAT", seq, 0, 0);
   windows.add("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", seq, 0, 0);
   windows.add("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG", seq, 0, 0);
   windows.add("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC", seq, 0, 0);
   
   windows.add("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT", seq, 0, 0);
   windows.add("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTAAA", seq, 0, 0);
   windows.add("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGGG", seq, 0, 0);
   windows.add("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCC", seq, 0, 0);
   
   windows.add("GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGTTTTTTTT", seq, 0, 0);
   windows.add("GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAAAAA", seq, 0, 0);
   windows.add("GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG", seq, 0, 0);
   windows.add("GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCCCCC", seq, 0, 0);
   
   windows.add("CCCCCCCCCCCCCCCCCCCCCCCCCTTTTTTTTTTTTTTT", seq, 0, 0);
   windows.add("CCCCCCCCCCCCCCCCCCCCCCCCCAAAAAAGCTAAAAAA", seq, 0, 0);
   windows.add("CCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGTCTAGGGGG", seq, 0, 0);
   windows.add("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCATGCCCCCC", seq, 0, 0);

   windows.sort();
   list<pair <junction, size_t> > sort_clones = windows.getSortedList();
   
   comp_matrix comp=comp_matrix(sort_clones);
   
   //create matrix using junctions 
   comp.compare( cout, Cluster);
   
   //save matrix file 
   comp.save("comp_matrix.data");
   
   //reset matrix 
   comp.del();
   
   //create matrix using matrix file 
   comp.load("comp_matrix.data");
 
   //save matrix file 
   comp.save("comp_matrix2.data");
  
   //test clustering

   list <list <junction> > cluster ;

   //0 différence admise / taille mini 10 / 0 cluster possible
   cluster = comp.cluster(forced_edges, w, cout, 0, 10) ;
   TAP_TEST(cluster.size()==0, TEST_CLUSTER, 
            "no cluster here (cluster.size()=" <<cluster.size() << ")" ) ;

   //epsilon 0// taille mini 1 / 16 cluster possible
   cluster = comp.cluster(forced_edges, w, cout, 0, 1) ;
   TAP_TEST(cluster.size()==16, TEST_CLUSTER, "expected 16 clusters (cluster.size()=" <<cluster.size() << ")") ;

   //epsilon 1// taille mini 3 / 1 cluster possible
   cluster = comp.cluster(forced_edges, w, cout, 1, 3) ;
   TAP_TEST(cluster.size()==1, TEST_CLUSTER, "expected 1 cluster (cluster.size()=" <<cluster.size() << ")") ;

   //epsilon 3// taille mini 3 / 2 cluster possible
   cluster = comp.cluster(forced_edges, w, cout, 3, 3) ;
   TAP_TEST(cluster.size()==2, TEST_CLUSTER, "expected 2 clusters (cluster.size()=" <<cluster.size() << ")") ;
   
   //epsilon 20 // taille mini 3 / 4 cluster possible
   cluster = comp.cluster(forced_edges, w, cout, 20, 3) ;
   TAP_TEST(cluster.size()==4, TEST_CLUSTER, "expected 4 clusters (cluster.size()=" <<cluster.size() << ")") ;

   //del matrix 
   comp.del();

}
Exemple #2
0
WindowsStorage *WindowExtractor::extract(OnlineFasta *reads,
					 size_t w,
                                         map<string, string> &windows_labels, bool only_labeled_windows,
                                         bool keep_unsegmented_as_clone,
                                         double nb_expected, int nb_reads_for_evalue) {
  init_stats();

  WindowsStorage *windowsStorage = new WindowsStorage(windows_labels);
  windowsStorage->setMaximalNbReadsPerWindow(max_reads_per_window);

  unsigned long long int bp_total = 0;

  while (reads->hasNext()) {

    try {
      reads->next();
    }
    catch (const invalid_argument e) {
      cout << endl;
      cerr << WARNING_STRING << "Error in getting a new read: " << e.what() << endl;
      cerr << WARNING_STRING << "Vidjil stops the analysis here, after " << nb_reads << " reads." << endl;
      break ;
    }

    nb_reads++;

    if (out_affects) {
      *out_affects << reads->getSequence();
    }
    
    KmerMultiSegmenter kmseg(reads->getSequence(), multigermline, out_affects, nb_expected, nb_reads_for_evalue);
    
    KmerSegmenter *seg = kmseg.the_kseg ;

    // Window length threshold
    junction junc ;
    if (seg->isSegmented()) {
      junc = seg->getJunction(w);
      if (!junc.size()) {
        seg->setSegmentationStatus(UNSEG_TOO_SHORT_FOR_WINDOW);
      }
    }

    int read_length = seg->getSequence().sequence.length();

    // Update stats
    stats[seg->getSegmentationStatus()].insert(read_length);

    if (seg->isSegmented()) {

      // Filter
      if (!only_labeled_windows || windowsStorage->isInterestingJunction(junc))

      // Store the window
      windowsStorage->add(junc, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);

      // Update stats
      stats[TOTAL_SEG_AND_WINDOW].insert(read_length) ;
      stats_reads[seg->system].addScore(read_length);

      if (out_segmented) {
        *out_segmented << *seg ; // KmerSegmenter output (V/N/J)
      }
    } else {
      if (keep_unsegmented_as_clone && (reads->getSequence().sequence.length() >= w))
        {
          // Keep the unsegmented read, taking the full sequence as the junction
          windowsStorage->add(reads->getSequence().sequence, reads->getSequence(), seg->getSegmentationStatus(), seg->segmented_germline);
          stats[TOTAL_SEG_AND_WINDOW].insert(read_length) ; // TODO: rather count that in a pseudo-germline such as 'TRG!'
        }

      if (out_unsegmented) {
        *out_unsegmented << *seg ;
      }
      if (out_unsegmented_detail && (seg->getSegmentationStatus() >= STATS_FIRST_UNSEG)) {
        if (unsegmented_detail_full || (seg->getSegmentationStatus() != UNSEG_TOO_FEW_ZERO && seg->getSegmentationStatus() != UNSEG_TOO_SHORT))
        *out_unsegmented_detail[seg->getSegmentationStatus()] << *seg ;
      }
    }

    // Last line of detailed affects output
    if (out_affects) {
      *out_affects << "#>" << seg->label << " " <<  seg->getInfoLine() << endl << endl;
    }

    // Progress bar
    bp_total += read_length;

    if (!(nb_reads % PROGRESS_POINT))
      {
	cout << "." ;

	if (!(nb_reads % (PROGRESS_POINT * PROGRESS_LINE)))
	  cout << setw(10) << nb_reads / 1000 << "k reads " << fixed << setprecision(2) << setw(14) << bp_total / 1E6 << " Mbp" << endl ;

	cout.flush() ;
      }
  }

  cout << endl ;

  fillStatsClones(windowsStorage);

  return windowsStorage;
}