예제 #1
0
KmerAffect &KmerAffect::operator+=(const KmerAffect &kmer) {
  if (kmer.affect != affect) {
    if (isUnknown())
      *this = kmer;
    else if (affect_char(affect) == affect_char(kmer.affect)
             && (affect_strand(affect) != affect_strand(kmer.affect))) {
      // Same label but different strand
      // -> we put ambiguous, we could have something to say that
      // strand is ambiguous but not the label, but we don't have enough space
      // in 1 byte…
      *this = AFFECT_AMBIGUOUS;
      affect.length = kmer.getLength();
    } else {
      assert(affect.c != kmer.affect.c || getLength() == kmer.getLength());
      *this = AFFECT_AMBIGUOUS;
      affect.length = kmer.getLength();
    }
  }
  return *this;
}
예제 #2
0
KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold, double multiplier)
{
  box_V = new AlignBox();
  box_D = new AlignBox();
  box_J = new AlignBox();

  CDR3start = -1;
  CDR3end = -1;

  JUNCTIONstart = -1;
  JUNCTIONend = -1;

  label = seq.label ;
  sequence = seq.sequence ;
  info = "" ;
  info_extra = "seed";
  segmented = false;
  segmented_germline = germline ;
  system = germline->code; // useful ?
  reversed = false;
  because = NOT_PROCESSED ; // Cause of unsegmentation
  score = 0 ;
  evalue = NO_LIMIT_VALUE;
  evalue_left = NO_LIMIT_VALUE;
  evalue_right = NO_LIMIT_VALUE;

  int s = (size_t)germline->index->getS() ;
  int length = sequence.length() ;

  if (length < s) 
    {
      because = UNSEG_TOO_SHORT;
      kaa = NULL;
      return ;
    }
 
  kaa = new KmerAffectAnalyser(*(germline->index), sequence);
  
  // Check strand consistency among the affectations.
  int strand;
  int nb_strand[2] = {0,0};     // In cell 0 we'll put the number of negative
                                // strand, while in cell 1 we'll put the
                                // positives
  for (int i = 0; i < kaa->count(); i++) { 
    KmerAffect it = kaa->getAffectation(i);
    if (! it.isAmbiguous() && ! it.isUnknown()) {
      strand = affect_strand(it.affect);
      nb_strand[(strand + 1) / 2] ++; // (strand+1) / 2 → 0 if strand == -1; 1 if strand == 1
    }
  }

  score = nb_strand[0] + nb_strand[1] ; // Used only for non-segmented germlines

  reversed = (nb_strand[0] > nb_strand[1]) ;

  if ((germline->seg_method == SEG_METHOD_MAX12)
      || (germline->seg_method == SEG_METHOD_MAX1U))
    { // Pseudo-germline, MAX12 and MAX1U
      pair <KmerAffect, KmerAffect> max12 ;
      CountKmerAffectAnalyser ckaa(*(germline->index), sequence);


      set<KmerAffect> forbidden;
      forbidden.insert(KmerAffect::getAmbiguous());
      forbidden.insert(KmerAffect::getUnknown());

      if (germline->seg_method == SEG_METHOD_MAX12)
        // MAX12: two maximum k-mers (no unknown)
        {
          max12 = ckaa.max12(forbidden);

          if (max12.first.isUnknown() || max12.second.isUnknown())
            {
              because = UNSEG_TOO_FEW_ZERO ;
              return ;
            }
        }

      else
        // MAX1U: the maximum k-mers (no unknown) + unknown
        {
          CountKmerAffectAnalyser ckaa(*(germline->index), sequence);
          KmerAffect max = ckaa.max(forbidden);

          if (max.isUnknown())
            {
              because = UNSEG_TOO_FEW_ZERO ;
              return ;
            }
          max12 = make_pair(max, KmerAffect::getUnknown());
        }

      pair <KmerAffect, KmerAffect> before_after =  ckaa.sortLeftRight(max12);

      before = before_after.first ;
      after = before_after.second ;

      // This strand computation is only a heuristic, especially for chimera +/- reads
      // Anyway, it allows to gather such reads and their reverse complement into a unique window...
      // ... except when the read is quite different outside the window
      strand = reversed ? -1 : 1 ;
    }

  else
    { // Regular germline

  // Test on which strand we are, select the before and after KmerAffects
  if (nb_strand[0] == 0 && nb_strand[1] == 0) {
    because = UNSEG_TOO_FEW_ZERO ;
    return ;
  } else if (nb_strand[0] > RATIO_STRAND * nb_strand[1]) {
    strand = -1;
    before = KmerAffect(germline->affect_3, -1); 
    after = KmerAffect(germline->affect_5, -1);
  } else if (nb_strand[1] > RATIO_STRAND * nb_strand[0]) {
    strand = 1;
    before = KmerAffect(germline->affect_5, 1); 
    after = KmerAffect(germline->affect_3, 1);    
  } else {
    // Ambiguous information: we have positive and negative strands
    // and there is not enough difference to put them apart.
    if (nb_strand[0] + nb_strand[1] >= DETECT_THRESHOLD_STRAND)
      because = UNSEG_STRAND_NOT_CONSISTENT ;
    else
      because = UNSEG_TOO_FEW_ZERO ;
    return ;
  }

    } // endif Pseudo-germline
 
  computeSegmentation(strand, before, after, threshold, multiplier);
}
예제 #3
0
string toStringSigns(const affect_t &a){
  if((a == AFFECT_UNKNOWN.affect) || (a == AFFECT_AMBIGUOUS.affect))
    return " ";
  else
    return (affect_strand(a)==1 ? "+" : "-");
}
예제 #4
0
bool KmerAffect::isAmbiguous() const {
  return affect_strand(affect) == 1 && affect_char(affect) == AFFECT_AMBIGUOUS_CHAR;
}
예제 #5
0
int KmerAffect::getStrand() const{
  if (isUnknown() || isAmbiguous())
    return 0;
  return affect_strand(affect);
}