Пример #1
0
int main(int argc, const char** argv)
{
  usage(argc, argv);

  string read = argv[1];
  string germline = argv[2];

  Fasta Vgenes(germline+"V.fa", 2, "|");
  Fasta Jgenes(germline+"J.fa", 2, "|");

  Fasta interestingV = extractInterestingGenes(Vgenes, argv[3]);
  Fasta interestingJ = extractInterestingGenes(Jgenes, argv[4]);

  if (interestingV.size() == 0) {
    cerr << "No interesting V found" << endl;
    exit(2);
  }
  if (interestingJ.size() == 0) {
    cerr << "No interesting J found" << endl;
    exit(2);
  }

  AlignBox box_V("5", V_COLOR);
  AlignBox box_J("3", J_COLOR);

  if (read == "-") {
    // Read on stdin
    read = read_sequence(cin);
  }
  
  align_against_collection(read, interestingV, -1, false, false, false, &box_V, VDJ);
  align_against_collection(read, interestingJ, -1, false, true, false, &box_J, VDJ);
  // This should be handled directly into align_against_collection
  box_J.start = box_J.end ;
  box_J.del_left = box_J.del_right;
  box_J.end = read.size() - 1;
  
  int align_V_length = min(GENE_ALIGN, box_V.end - box_V.start + 1);
  int align_J_length = min(GENE_ALIGN, (int)read.size() - box_J.start + 1);
  int start_V = box_V.end - align_V_length + 1;
  int end_J = box_J.start + align_J_length - 1;

  cout << "read        \t" << start_V << "\t" ;

  cout << V_COLOR << read.substr(start_V, align_V_length)
       << NO_COLOR
       << read.substr(box_V.end+1, (box_J.start - 1) - (box_V.end + 1) +1)
       << J_COLOR
       << read.substr(box_J.start, align_J_length)
       << NO_COLOR
       << "\t" << end_J << endl ;

  cout << box_V.refToString(start_V, end_J) << "\t" << box_V << endl ;
  cout << box_J.refToString(start_V, end_J) << "\t" << box_J << endl ;
      
  exit (0);
}
Пример #2
0
Fasta extractInterestingGenes(Fasta &repertoire, string name) {
  Fasta interesting;
  
  int size = repertoire.size();
  for (int i = 0; i < size; i++) {
    if (repertoire.label(i).find(name) != string::npos) {
      interesting.add(repertoire.read(i));
    }
  }

  return interesting;
}
Пример #3
0
void align_against_collection(string &read, Fasta &rep, int forbidden_rep_id,
                              bool reverse_ref, bool reverse_both, bool local,
                             AlignBox *box, Cost segment_cost)
{
  
  int best_score = MINUS_INF ;
  box->ref_nb = MINUS_INF ;
  int best_best_i = (int) string::npos ;
  int best_best_j = (int) string::npos ;
  int best_first_i = (int) string::npos ;
  int best_first_j = (int) string::npos ;

  vector<pair<int, int> > score_r;

  DynProg::DynProgMode dpMode = DynProg::LocalEndWithSomeDeletions;
  if (local==true) dpMode = DynProg::Local;

  // With reverse_ref, the read is reversed to prevent calling revcomp on each reference sequence
  string sequence_or_rc = revcomp(read, reverse_ref);
  
  for (int r = 0 ; r < rep.size() ; r++)
    {
      if (r == forbidden_rep_id)
        continue;

      DynProg dp = DynProg(sequence_or_rc, rep.sequence(r),
			   dpMode, // DynProg::SemiGlobalTrans, 
			   segment_cost, // DNA
			   reverse_both, reverse_both,
                          rep.read(r).marked_pos);

      bool onlyBottomTriangle = !local ;
      int score = dp.compute(onlyBottomTriangle, BOTTOM_TRIANGLE_SHIFT);
      
      if (local==true){ 
	dp.backtrack();
      }
      
      if (score > best_score)
	{
	  best_score = score ;
	  best_best_i = dp.best_i ;
	  best_best_j = dp.best_j ;
	  best_first_i = dp.first_i ;
	  best_first_j = dp.first_j ;
	  box->ref_nb = r ;
	  box->ref_label = rep.label(r) ;

          if (!local)
            dp.backtrack();
          box->marked_pos = dp.marked_pos_i ;
	}
	
	score_r.push_back(make_pair(score, r));

	// #define DEBUG_SEGMENT      

#ifdef DEBUG_SEGMENT	
	cout << rep.label(r) << " " << score << " " << dp.best_i << endl ;
#endif

    }
    sort(score_r.begin(),score_r.end(),comp_pair);

  box->ref = rep.sequence(box->ref_nb);
  box->del_right = reverse_both ? best_best_j : box->ref.size() - best_best_j - 1;
  box->del_left = best_first_j;
  box->start = best_first_i;
  
  box->score = score_r;

#ifdef DEBUG_SEGMENT	
  cout << "best: " << box->ref_label << " " << best_score ;
  cout << "del/del2/begin:" << (box->del_right) << "/" << (box->del_left) << "/" << (box->start) << endl;
  cout << endl;
#endif

  if (reverse_ref)
    // Why -1 here and +1 in dynprog.cpp /// best_i = m - best_i + 1 ;
    best_best_i = read.length() - best_best_i - 1 ;

  box->end = best_best_i ;
}