KmerAffect &KmerAffect::operator+=(const KmerAffect &kmer) { if (kmer.affect != affect) { if (isUnknown()) *this = kmer; else if (affect_char(affect) == affect_char(kmer.affect) && (affect_strand(affect) != affect_strand(kmer.affect))) { // Same label but different strand // -> we put ambiguous, we could have something to say that // strand is ambiguous but not the label, but we don't have enough space // in 1 byte… *this = AFFECT_AMBIGUOUS; affect.length = kmer.getLength(); } else { assert(affect.c != kmer.affect.c || getLength() == kmer.getLength()); *this = AFFECT_AMBIGUOUS; affect.length = kmer.getLength(); } } return *this; }
KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold, double multiplier) { box_V = new AlignBox(); box_D = new AlignBox(); box_J = new AlignBox(); CDR3start = -1; CDR3end = -1; JUNCTIONstart = -1; JUNCTIONend = -1; label = seq.label ; sequence = seq.sequence ; info = "" ; info_extra = "seed"; segmented = false; segmented_germline = germline ; system = germline->code; // useful ? reversed = false; because = NOT_PROCESSED ; // Cause of unsegmentation score = 0 ; evalue = NO_LIMIT_VALUE; evalue_left = NO_LIMIT_VALUE; evalue_right = NO_LIMIT_VALUE; int s = (size_t)germline->index->getS() ; int length = sequence.length() ; if (length < s) { because = UNSEG_TOO_SHORT; kaa = NULL; return ; } kaa = new KmerAffectAnalyser(*(germline->index), sequence); // Check strand consistency among the affectations. int strand; int nb_strand[2] = {0,0}; // In cell 0 we'll put the number of negative // strand, while in cell 1 we'll put the // positives for (int i = 0; i < kaa->count(); i++) { KmerAffect it = kaa->getAffectation(i); if (! it.isAmbiguous() && ! it.isUnknown()) { strand = affect_strand(it.affect); nb_strand[(strand + 1) / 2] ++; // (strand+1) / 2 → 0 if strand == -1; 1 if strand == 1 } } score = nb_strand[0] + nb_strand[1] ; // Used only for non-segmented germlines reversed = (nb_strand[0] > nb_strand[1]) ; if ((germline->seg_method == SEG_METHOD_MAX12) || (germline->seg_method == SEG_METHOD_MAX1U)) { // Pseudo-germline, MAX12 and MAX1U pair <KmerAffect, KmerAffect> max12 ; CountKmerAffectAnalyser ckaa(*(germline->index), sequence); set<KmerAffect> forbidden; forbidden.insert(KmerAffect::getAmbiguous()); forbidden.insert(KmerAffect::getUnknown()); if (germline->seg_method == SEG_METHOD_MAX12) // MAX12: two maximum k-mers (no unknown) { max12 = ckaa.max12(forbidden); if (max12.first.isUnknown() || max12.second.isUnknown()) { because = UNSEG_TOO_FEW_ZERO ; return ; } } else // MAX1U: the maximum k-mers (no unknown) + unknown { CountKmerAffectAnalyser ckaa(*(germline->index), sequence); KmerAffect max = ckaa.max(forbidden); if (max.isUnknown()) { because = UNSEG_TOO_FEW_ZERO ; return ; } max12 = make_pair(max, KmerAffect::getUnknown()); } pair <KmerAffect, KmerAffect> before_after = ckaa.sortLeftRight(max12); before = before_after.first ; after = before_after.second ; // This strand computation is only a heuristic, especially for chimera +/- reads // Anyway, it allows to gather such reads and their reverse complement into a unique window... // ... except when the read is quite different outside the window strand = reversed ? -1 : 1 ; } else { // Regular germline // Test on which strand we are, select the before and after KmerAffects if (nb_strand[0] == 0 && nb_strand[1] == 0) { because = UNSEG_TOO_FEW_ZERO ; return ; } else if (nb_strand[0] > RATIO_STRAND * nb_strand[1]) { strand = -1; before = KmerAffect(germline->affect_3, -1); after = KmerAffect(germline->affect_5, -1); } else if (nb_strand[1] > RATIO_STRAND * nb_strand[0]) { strand = 1; before = KmerAffect(germline->affect_5, 1); after = KmerAffect(germline->affect_3, 1); } else { // Ambiguous information: we have positive and negative strands // and there is not enough difference to put them apart. if (nb_strand[0] + nb_strand[1] >= DETECT_THRESHOLD_STRAND) because = UNSEG_STRAND_NOT_CONSISTENT ; else because = UNSEG_TOO_FEW_ZERO ; return ; } } // endif Pseudo-germline computeSegmentation(strand, before, after, threshold, multiplier); }
string toStringSigns(const affect_t &a){ if((a == AFFECT_UNKNOWN.affect) || (a == AFFECT_AMBIGUOUS.affect)) return " "; else return (affect_strand(a)==1 ? "+" : "-"); }
bool KmerAffect::isAmbiguous() const { return affect_strand(affect) == 1 && affect_char(affect) == AFFECT_AMBIGUOUS_CHAR; }
int KmerAffect::getStrand() const{ if (isUnknown() || isAmbiguous()) return 0; return affect_strand(affect); }