std::size_t get_ref_seq_known_size(const reference_contig_segment& ref, const pos_range pr) { pos_t b(0); pos_t end(ref.end()); if (pr.is_begin_pos && (pr.begin_pos>0)) b=pr.begin_pos; if (pr.is_end_pos && (pr.end_pos>0)) end=std::min(end,pr.end_pos); std::size_t size(0); for (; b<end; ++b) { if (ref.get_base(b) != 'N') size++; } return size; }
static void set_repeat_info(const indel_key& ik, const reference_contig_segment& ref, starling_indel_report_info& iri) { iri.is_repeat_unit = false; iri.repeat_unit = "N/A"; iri.ref_repeat_count = 0; iri.indel_repeat_count = 0; if(! ((iri.it == INDEL::INSERT) || (iri.it == INDEL::DELETE) || (iri.it == INDEL::SWAP))) return; unsigned insert_repeat_count(0); unsigned delete_repeat_count(0); if (iri.it == INDEL::INSERT) { get_seq_repeat_unit(iri.indel_seq,iri.repeat_unit,insert_repeat_count); } else if(iri.it == INDEL::DELETE) { get_seq_repeat_unit(iri.ref_seq,iri.repeat_unit,delete_repeat_count); } else if(iri.it == INDEL::SWAP) { std::string insert_ru; std::string delete_ru; get_seq_repeat_unit(iri.indel_seq,insert_ru,insert_repeat_count); get_seq_repeat_unit(iri.ref_seq,delete_ru,delete_repeat_count); if((insert_ru != delete_ru) || insert_ru.empty()) return; iri.repeat_unit=insert_ru; } else { assert(0); } // count repeats in contextual sequence: unsigned indel_context_repeat_count(0); { const pos_t indel_begin_pos(ik.pos); const pos_t indel_end_pos(ik.right_pos()); const int repeat_unit_size(static_cast<int>(iri.repeat_unit.size())); // count upstream repeats: for(pos_t i(indel_begin_pos-repeat_unit_size); i>=0; i-=repeat_unit_size) { bool is_repeat(true); for(int j(0); j<repeat_unit_size; ++j) { if(ref.get_base(i+j) != iri.repeat_unit[j]) { is_repeat = false; break; } } if(! is_repeat) break; indel_context_repeat_count += 1; } // count downstream repeats: const pos_t rs(ref.end()); for(pos_t i(indel_end_pos); (i+static_cast<pos_t>(repeat_unit_size)-1)<rs; i+=repeat_unit_size) { bool is_repeat(true); for(int j(0); j<repeat_unit_size; ++j) { if(ref.get_base(i+j) != iri.repeat_unit[j]) { is_repeat = false; break; } } if(! is_repeat) break; indel_context_repeat_count += 1; } } iri.is_repeat_unit = true; iri.ref_repeat_count = indel_context_repeat_count+delete_repeat_count; iri.indel_repeat_count = indel_context_repeat_count+insert_repeat_count; }