// Hit needs to be from the collapsed (non_redundant) list to match indexing double BiasCorrectionHelper::get_cond_prob(const MateHit& hit) { shared_ptr<ReadGroupProperties const> rgp = hit.read_group_props(); int i = get_index(rgp); int start; int end; int frag_len; int trans_len = _transcript->length(); _transcript->map_frag(hit, start, end, frag_len); shared_ptr<const EmpDist> fld = rgp->frag_len_dist(); double cond_prob = 1.0; cond_prob *= _start_biases[i][start]; cond_prob *= _end_biases[i][end]; double frag_prob = (bias_mode == POS || bias_mode == POS_VLMM || bias_mode == POS_SITE) ? fld->npdf(frag_len, trans_len-start) : fld->pdf(frag_len); cond_prob *= frag_prob; if (cond_prob==0.0) return 0.0; if (hit.is_pair() || hit.read_group_props()->complete_fragments()) { if (frag_len >= (int)_tot_biases_for_len[i].size()) cond_prob = 0.0; else cond_prob /= _tot_biases_for_len[i][frag_len]; } else if (start!=trans_len && end==trans_len) // The hit is a singleton at the start of a fragment cond_prob /= _start_biases_for_len[i][frag_len]; else if (start==trans_len && end!=trans_len) // The hit is a singleton at the end of a fragment cond_prob /= _end_biases_for_len[i][frag_len]; else if (frag_len==trans_len) // We don't actually know where we start or end and can't subtract off the frag_len or we'll get inf cond_prob /= trans_len; else { if (trans_len < frag_len) { cond_prob = 0; } else { // Single-end read w/ library type FF or RR cond_prob /= trans_len-frag_len; } } if (cond_prob > 0 && hit.collapse_mass() > 0) { _rg_masses[i] += hit.collapse_mass(); _mapped = true; } #if DEBUG if (isinf(cond_prob)) { double cond_prob = 1.0; cond_prob *= _start_biases[i][start]; cond_prob *= _end_biases[i][end]; double frag_prob = (bias_mode == POS || bias_mode == POS_VLMM || bias_mode == POS_SITE) ? fld->npdf(frag_len, trans_len-start) : fld->pdf(frag_len); cond_prob *= frag_prob; if (cond_prob==0.0) return 0.0; if (hit.is_pair()) { if (frag_len >= _tot_biases_for_len[i].size()) cond_prob = 0.0; else cond_prob /= _tot_biases_for_len[i][frag_len]; } else if (start!=trans_len && end==trans_len) // The hit is a singleton at the start of a fragment cond_prob /= _start_biases_for_len[i][frag_len]; else if (start==trans_len && end!=trans_len) // The hit is a singleton at the end of a fragment cond_prob /= _end_biases_for_len[i][frag_len]; else if (frag_len==trans_len) // We don't actually know where we start or end and can't subtract off the frag_len or we'll get inf cond_prob /= trans_len; else { if (trans_len < frag_len) { cond_prob = 0; } else { // Single-end read w/ library type FF or RR cond_prob /= trans_len-frag_len; } } } #endif assert(!isinf(cond_prob)); assert(!isnan(cond_prob)); if (isinf(cond_prob) || isnan(cond_prob)) cond_prob = 0.0; return cond_prob; }
// Places multi-reads to the right of reads they match bool mate_hit_lt(const MateHit& lhs, const MateHit& rhs) { if (lhs.left() != rhs.left()) return lhs.left() < rhs.left(); if (lhs.right() != rhs.right()) return lhs.right() > rhs.right(); if ((lhs.left_alignment() == NULL) != (rhs.left_alignment() == NULL)) return (lhs.left_alignment() == NULL) < (rhs.left_alignment() == NULL); if ((lhs.right_alignment() == NULL) != (rhs.right_alignment() == NULL)) return (lhs.right_alignment() == NULL) < (rhs.right_alignment() == NULL); assert ((lhs.right_alignment() == NULL) == (rhs.right_alignment() == NULL)); assert ((lhs.left_alignment() == NULL) == (rhs.left_alignment() == NULL)); const ReadHit* lhs_l = lhs.left_alignment(); const ReadHit* lhs_r = lhs.right_alignment(); const ReadHit* rhs_l = rhs.left_alignment(); const ReadHit* rhs_r = rhs.right_alignment(); if (lhs_l && rhs_l) { if (lhs_l->cigar().size() != rhs_l->cigar().size()) return lhs_l->cigar().size() < rhs_l->cigar().size(); for (size_t i = 0; i < lhs_l->cigar().size(); ++i) { if (lhs_l->cigar()[i].opcode != rhs_l->cigar()[i].opcode) return lhs_l->cigar()[i].opcode < rhs_l->cigar()[i].opcode; if (lhs_l->cigar()[i].length != rhs_l->cigar()[i].length) return lhs_l->cigar()[i].length < rhs_l->cigar()[i].length; } } if (lhs_r && rhs_r) { if (lhs_r->cigar().size() != rhs_r->cigar().size()) return lhs_r->cigar().size() < rhs_r->cigar().size(); for (size_t i = 0; i < lhs_r->cigar().size(); ++i) { if (lhs_r->cigar()[i].opcode != rhs_r->cigar()[i].opcode) return lhs_r->cigar()[i].opcode < rhs_r->cigar()[i].opcode; if (lhs_r->cigar()[i].length != rhs_r->cigar()[i].length) return lhs_r->cigar()[i].length < rhs_r->cigar()[i].length; } } if (lhs.is_multi() != rhs.is_multi()) { return rhs.is_multi(); } return false; }
bool has_no_collapse_mass(const MateHit& hit) { return hit.collapse_mass() == 0; }
// Does NOT care about the read group this hit came from. bool hits_equals(const MateHit& lhs, const MateHit& rhs) { if (lhs.ref_id() != rhs.ref_id()) return false; if ((lhs.left_alignment() == NULL) != (rhs.left_alignment() == NULL)) return false; if ((lhs.right_alignment() == NULL) != (rhs.right_alignment() == NULL)) return false; if (lhs.left_alignment()) { if (!(hits_eq_mod_id(*lhs.left_alignment(),*(rhs.left_alignment())))) return false; } if (lhs.right_alignment()) { if (!(hits_eq_mod_id(*lhs.right_alignment(),*(rhs.right_alignment())))) return false; } return true; }
// Compares for structural equality, but won't declare multihits equal to one another // and won't return true for hits from different read groups (e.g. replicate samples) bool hits_eq_non_multi_non_replicate(const MateHit& lhs, const MateHit& rhs) { if ((lhs.is_multi() || rhs.is_multi() || lhs.read_group_props() != rhs.read_group_props()) && lhs.insert_id() != rhs.insert_id()) return false; return hits_equals(lhs, rhs); }
// Compares for structural equality, but won't declare multihits equal to one another bool hits_eq_non_multi(const MateHit& lhs, const MateHit& rhs) { if ((lhs.is_multi() || rhs.is_multi() ) && lhs.insert_id() != rhs.insert_id()) return false; return hits_equals(lhs, rhs); }