Пример #1
0
string posFromBoxes(vector <AlignBox*> boxes)
{
  string poss = "";
  string initials = "";

  int n = boxes.size();


  for (int i=0; i<n; i++) {
    initials += boxes[i]->getInitial() ;

    poss += " " + string_of_int(boxes[i]->start + FIRST_POS) ;
    poss += " " + string_of_int(boxes[i]->end + FIRST_POS) ;
  }

  return initials + "\t" + poss;
}
Пример #2
0
string codeFromBoxes(vector <AlignBox*> boxes, string sequence)
{
  string code = "";

  int n = boxes.size();

  for (int i=0; i<n; i++) {

    if (i>0) {
      code += " " + string_of_int(boxes[i-1]->del_right) + "/"
        // From box_left->end + 1 to box_right->start - 1, both positions included
        + sequence.substr(boxes[i-1]->end + 1, boxes[i]->start - boxes[i-1]->end - 1)
        + "/" + string_of_int(boxes[i]->del_left) + " " ;
    }

    code += boxes[i]->ref_label ;
  }

  return code;
}
Пример #3
0
// Generate 10 sequences, and launch 10 times getRandom(1).
// We should not have the same sequence 10 times (p < 10^{-10})
void testRandom() {
  list<Sequence> seqs;
  string seg_name = "seq";
  char id = '0';
  string sequence = "AA";

  for (int i = 0; i < 10; i++) {
    seqs.push_back(create_sequence("seq" + string_of_int(id), "seq" + string_of_int(id), sequence, ""));
    sequence += "A";
    id++;
  }

  SequenceSampler sampler(seqs);
  string first_random = sampler.getRandom(1).front().label;
  bool all_equal = true;
  for (int i = 0; i < 9 && all_equal; i++) {
    if (first_random != sampler.getRandom(1).front().label)
      all_equal = false;
  }

  TAP_TEST(all_equal == false, TEST_SAMPLER_RANDOM, "On the 10 trials, we drawn 10 times the same sequence");
}
Пример #4
0
string Segmenter::getInfoLine() const
{
  string s = "" ;

  s += (segmented ? "" : "! ") + info ;
  s += " " + info_extra ;
  s += " " + segmented_germline->code ;
  s += " " + string(segmented_mesg[because]) ;

  if (evalue > NO_LIMIT_VALUE)
    s += " " + scientific_string_of_double(evalue);

  if (evalue_left > NO_LIMIT_VALUE)
    s += " " + scientific_string_of_double(evalue_left);
  if (evalue_right > NO_LIMIT_VALUE)
    s += "/" + scientific_string_of_double(evalue_right);

  if (CDR3start > 0)
    s += " {" + string_of_int(JUNCTIONstart) + "(" + string_of_int(JUNCTIONend-JUNCTIONstart+1) + ")" + string_of_int(JUNCTIONend) + " "
      + "up"[JUNCTIONproductive] + " " + JUNCTIONaa + "}";

  return s ;
}
Пример #5
0
string Segmenter::removeChevauchement()
{
  assert(isSegmented());
  
  string chevauchement = "" ;

  if (box_V->end >= box_J->start)
    {
      int middle = (box_V->end + box_J->start) / 2 ;
      chevauchement = " !ov " + string_of_int (box_V->end - box_J->start + 1);
      box_V->end = middle ;
      box_J->start = middle+1 ;
    }

  return chevauchement ;
}
Пример #6
0
FineSegmenter::FineSegmenter(Sequence seq, Germline *germline, Cost segment_c,  double threshold, double multiplier)
{
  box_V = new AlignBox("5");
  box_D = new AlignBox("4");
  box_J = new AlignBox("3");

  segmented = false;
  dSegmented = false;
  because = NOT_PROCESSED ;
  segmented_germline = germline ;
  info_extra = "" ;
  label = seq.label ;
  sequence = seq.sequence ;
  segment_cost=segment_c;
  evalue = NO_LIMIT_VALUE;
  evalue_left = NO_LIMIT_VALUE;
  evalue_right = NO_LIMIT_VALUE;
  box_V->marked_pos = 0;
  box_J->marked_pos = 0;

  CDR3start = -1;
  CDR3end = -1;

  JUNCTIONstart = -1;
  JUNCTIONend = -1;

  bool reverse_V = false ;
  bool reverse_J = false ;

  if ((germline->seg_method == SEG_METHOD_MAX12) || (germline->seg_method == SEG_METHOD_MAX1U))
    {
      // We check whether this sequence is segmented with MAX12 or MAX1U (with default e-value parameters)
      KmerSegmenter *kseg = new KmerSegmenter(seq, germline, THRESHOLD_NB_EXPECTED, 1);
      if (kseg->isSegmented())
        {
          reversed = kseg->isReverse();

          KmerAffect left = reversed ? KmerAffect(kseg->after, true) : kseg->before ;
          KmerAffect right = reversed ? KmerAffect(kseg->before, true) : kseg->after ;

          delete kseg ;

          reverse_V = (left.getStrand() == -1);
          reverse_J = (right.getStrand() == -1);

          code = "Unexpected ";

          code += left.toStringSigns() + germline->index->getLabel(left).basename;
          code += "/";
          code += right.toStringSigns() + germline->index->getLabel(right).basename;
          info_extra += " " + left.toString() + "/" + right.toString() + " (" + code + ")";

          if (germline->seg_method == SEG_METHOD_MAX1U)
            return ;

          germline->override_rep5_rep3_from_labels(left, right);
        }
      else
        {
          delete kseg ;
          return ;
        }
    }

  // Strand determination, with KmerSegmenter (with default e-value parameters)
  // Note that we use only the 'strand' component
  // When the KmerSegmenter fails, continue with positive strand
  // TODO: flag to force a strand / to test both strands ?

  KmerSegmenter *kseg = new KmerSegmenter(seq, germline, THRESHOLD_NB_EXPECTED, 1);
  reversed = kseg->isReverse();
  delete kseg ;
  
  sequence_or_rc = revcomp(sequence, reversed); // sequence, possibly reversed


  /* Segmentation */
  align_against_collection(sequence_or_rc, germline->rep_5, NO_FORBIDDEN_ID, reverse_V, reverse_V, false,
                                        box_V, segment_cost);

  align_against_collection(sequence_or_rc, germline->rep_3, NO_FORBIDDEN_ID, reverse_J, !reverse_J, false,
                                          box_J, segment_cost);

  // J was run with '!reverseJ', we copy the box informations from right to left
  // Should this directly be handled in align_against_collection() ?
  box_J->start = box_J->end ;
  box_J->del_left = box_J->del_right;

  /* E-values */
  evalue_left  = multiplier * sequence.size() * germline->rep_5.totalSize() * segment_cost.toPValue(box_V->score[0].first);
  evalue_right = multiplier * sequence.size() * germline->rep_3.totalSize() * segment_cost.toPValue(box_J->score[0].first);
  evalue = evalue_left + evalue_right ;

  /* Unsegmentation causes */
  if (box_V->end == (int) string::npos)
    {
      evalue_left = BAD_EVALUE ;
    }
      
  if (box_J->start == (int) string::npos)
    {
      evalue_right = BAD_EVALUE ;
    }

  checkLeftRightEvaluesThreshold(threshold, reversed ? -1 : 1);

  if (because != NOT_PROCESSED)
    {
      segmented = false;
      info = " @" + string_of_int (box_V->end + FIRST_POS) + "  @" + string_of_int(box_J->start + FIRST_POS) ;
      return ;
    }

  /* The sequence is segmented */
  segmented = true ;
  because = reversed ? SEG_MINUS : SEG_PLUS ;

    //overlap VJ
  seg_N = check_and_resolve_overlap(sequence_or_rc, 0, sequence_or_rc.length(),
                                    box_V, box_J, segment_cost);

  // Reset extreme positions
  box_V->start = 0;
  box_J->end = sequence.length()-1;

  // Why could this happen ?
      if (box_J->start>=(int) sequence.length())
	  box_J->start=sequence.length()-1;

  // seg_N will be recomputed in finishSegmentation()

  boxes.clear();
  boxes.push_back(box_V);
  boxes.push_back(box_J);
  code = codeFromBoxes(boxes, sequence_or_rc);
  info = posFromBoxes(boxes);

  finishSegmentation();
}
Пример #7
0
string format_del(int deletions)
{
  return deletions ? *"(" + string_of_int(deletions) + " del)" : "" ;
}
Пример #8
0
void KmerSegmenter::computeSegmentation(int strand, KmerAffect before, KmerAffect after,
                                        double threshold, double multiplier) {
  // Try to segment, computing 'box_V->end' and 'box_J->start'
  // If not segmented, put the cause of unsegmentation in 'because'

  affect_infos max;
  max = kaa->getMaximum(before, after);

  // We did not find a good segmentation point
  if (!max.max_found) {
    // We labeled it detected if there were both enough affect_5 and enough affect_3
    bool detected_before = (max.nb_before_left + max.nb_before_right >= DETECT_THRESHOLD);
    bool detected_after = (max.nb_after_left + max.nb_after_right >= DETECT_THRESHOLD);

    if (detected_before && detected_after)
      because = UNSEG_AMBIGUOUS ;
    else if ((strand == 1 && detected_before) || (strand == -1 && detected_after))
      because = UNSEG_TOO_FEW_J ;
    else if ((strand == 1 && detected_after) || (strand == -1 && detected_before))
      because = UNSEG_TOO_FEW_V ;
    else
      because = UNSEG_TOO_FEW_ZERO ;

    return ;
  }


  // E-values
  pair <double, double> pvalues = kaa->getLeftRightProbabilityAtLeastOrAbove();
  evalue_left = pvalues.first * multiplier ;
  evalue_right = pvalues.second * multiplier ;
  evalue = evalue_left + evalue_right ;

  checkLeftRightEvaluesThreshold(threshold, strand);

  if (because != NOT_PROCESSED)
    return ;

   // There was a good segmentation point

   box_V->end = max.first_pos_max;
   box_J->start = max.last_pos_max + 1;
   if (strand == -1) {
     int tmp = sequence.size() - box_V->end - 1;
     box_V->end = sequence.size() - box_J->start - 1;
     box_J->start = tmp;
   }

  // Yes, it is segmented
  segmented = true;
  because = reversed ? SEG_MINUS : SEG_PLUS ;

  // TODO: this should also use possFromBoxes()... but 'boxes' is not defined here
  info = "VJ \t"
   + string_of_int(FIRST_POS) + " "
   + string_of_int(box_V->end + FIRST_POS) + " "
   + string_of_int(box_J->start + FIRST_POS) + " "
   + string_of_int(sequence.size() - 1 + FIRST_POS) ;
  
  // removeChevauchement is called once info was already computed: it is only to output info_extra
  info_extra += removeChevauchement();
  finishSegmentation();

  return ;
}