static void set_delete_seq(const indel_key& ik, const reference_contig_segment& ref, std::string& seq) { copy_ref_subseq(ref,ik.pos,ik.right_pos(),seq); }
bool is_range_adjacent_indel_breakpoints(const known_pos_range read_pr, const indel_key& ik) { if(read_pr.is_range_intersect(pos_range(ik.pos-1,ik.pos+1))) return true; const pos_t rpos(ik.right_pos()); if(ik.pos==rpos) return false; return (read_pr.is_range_intersect(pos_range(rpos-1,rpos+1))); }
void get_starling_indel_report_info(const indel_key& ik, const indel_data& id, const reference_contig_segment& ref, starling_indel_report_info& iri) { // indel summary info get_indel_summary_strings(ik,id,ref,iri.desc,iri.indel_seq,iri.ref_seq); get_vcf_summary_strings(ik,id,ref,iri.vcf_indel_seq,iri.vcf_ref_seq); iri.it=ik.type; const pos_t indel_begin_pos(ik.pos); const pos_t indel_end_pos(ik.right_pos()); // reference context: { static const unsigned INDEL_CONTEXT_SIZE(10); if(ik.type != INDEL::BP_RIGHT) { iri.ref_upstream.clear(); for(pos_t i(indel_begin_pos-static_cast<pos_t>(INDEL_CONTEXT_SIZE)); i<indel_begin_pos; ++i) { iri.ref_upstream += ref.get_base(i); } } else { iri.ref_upstream = "N/A"; } if(ik.type != INDEL::BP_LEFT) { iri.ref_downstream.clear(); for(pos_t i(indel_end_pos); i<(indel_end_pos+static_cast<pos_t>(INDEL_CONTEXT_SIZE)); ++i) { iri.ref_downstream += ref.get_base(i); } } else { iri.ref_downstream = "N/A"; } } // repeat analysis: set_repeat_info(ik,ref,iri); // interupted hpol compuation: iri.ihpol=get_interupted_hpol_size(indel_begin_pos-1,ref); iri.ihpol=std::max(iri.ihpol,get_interupted_hpol_size(indel_begin_pos,ref)); if(indel_begin_pos != indel_end_pos) { iri.ihpol=std::max(iri.ihpol,get_interupted_hpol_size(indel_end_pos-1,ref)); iri.ihpol=std::max(iri.ihpol,get_interupted_hpol_size(indel_end_pos,ref)); } }
static void set_repeat_info(const indel_key& ik, const reference_contig_segment& ref, starling_indel_report_info& iri) { iri.is_repeat_unit = false; iri.repeat_unit = "N/A"; iri.ref_repeat_count = 0; iri.indel_repeat_count = 0; if(! ((iri.it == INDEL::INSERT) || (iri.it == INDEL::DELETE) || (iri.it == INDEL::SWAP))) return; unsigned insert_repeat_count(0); unsigned delete_repeat_count(0); if (iri.it == INDEL::INSERT) { get_seq_repeat_unit(iri.indel_seq,iri.repeat_unit,insert_repeat_count); } else if(iri.it == INDEL::DELETE) { get_seq_repeat_unit(iri.ref_seq,iri.repeat_unit,delete_repeat_count); } else if(iri.it == INDEL::SWAP) { std::string insert_ru; std::string delete_ru; get_seq_repeat_unit(iri.indel_seq,insert_ru,insert_repeat_count); get_seq_repeat_unit(iri.ref_seq,delete_ru,delete_repeat_count); if((insert_ru != delete_ru) || insert_ru.empty()) return; iri.repeat_unit=insert_ru; } else { assert(0); } // count repeats in contextual sequence: unsigned indel_context_repeat_count(0); { const pos_t indel_begin_pos(ik.pos); const pos_t indel_end_pos(ik.right_pos()); const int repeat_unit_size(static_cast<int>(iri.repeat_unit.size())); // count upstream repeats: for(pos_t i(indel_begin_pos-repeat_unit_size); i>=0; i-=repeat_unit_size) { bool is_repeat(true); for(int j(0); j<repeat_unit_size; ++j) { if(ref.get_base(i+j) != iri.repeat_unit[j]) { is_repeat = false; break; } } if(! is_repeat) break; indel_context_repeat_count += 1; } // count downstream repeats: const pos_t rs(ref.end()); for(pos_t i(indel_end_pos); (i+static_cast<pos_t>(repeat_unit_size)-1)<rs; i+=repeat_unit_size) { bool is_repeat(true); for(int j(0); j<repeat_unit_size; ++j) { if(ref.get_base(i+j) != iri.repeat_unit[j]) { is_repeat = false; break; } } if(! is_repeat) break; indel_context_repeat_count += 1; } } iri.is_repeat_unit = true; iri.ref_repeat_count = indel_context_repeat_count+delete_repeat_count; iri.indel_repeat_count = indel_context_repeat_count+insert_repeat_count; }