static
void
set_delete_seq(const indel_key& ik,
               const reference_contig_segment& ref,
               std::string& seq)
{
    copy_ref_subseq(ref,ik.pos,ik.right_pos(),seq);
}
Пример #2
0
bool
is_range_adjacent_indel_breakpoints(const known_pos_range read_pr,
                                    const indel_key& ik) {

    if(read_pr.is_range_intersect(pos_range(ik.pos-1,ik.pos+1))) return true;
    const pos_t rpos(ik.right_pos());
    if(ik.pos==rpos) return false;
    return (read_pr.is_range_intersect(pos_range(rpos-1,rpos+1)));
}
void
get_starling_indel_report_info(const indel_key& ik,
                               const indel_data& id,
                               const reference_contig_segment& ref,
                               starling_indel_report_info& iri) {

    // indel summary info
    get_indel_summary_strings(ik,id,ref,iri.desc,iri.indel_seq,iri.ref_seq);
    get_vcf_summary_strings(ik,id,ref,iri.vcf_indel_seq,iri.vcf_ref_seq);

    iri.it=ik.type;

    const pos_t indel_begin_pos(ik.pos);
    const pos_t indel_end_pos(ik.right_pos());

    // reference context:
    {
        static const unsigned INDEL_CONTEXT_SIZE(10);

        if(ik.type != INDEL::BP_RIGHT) {
            iri.ref_upstream.clear();
            for(pos_t i(indel_begin_pos-static_cast<pos_t>(INDEL_CONTEXT_SIZE)); i<indel_begin_pos; ++i) {
                iri.ref_upstream += ref.get_base(i);
            }
        } else {
            iri.ref_upstream = "N/A";
        }
        if(ik.type != INDEL::BP_LEFT) {
            iri.ref_downstream.clear();
            for(pos_t i(indel_end_pos); i<(indel_end_pos+static_cast<pos_t>(INDEL_CONTEXT_SIZE)); ++i) {
                iri.ref_downstream += ref.get_base(i);
            }
        } else {
            iri.ref_downstream = "N/A";
        }
    }

    // repeat analysis:
    set_repeat_info(ik,ref,iri);

    // interupted hpol compuation:
    iri.ihpol=get_interupted_hpol_size(indel_begin_pos-1,ref);
    iri.ihpol=std::max(iri.ihpol,get_interupted_hpol_size(indel_begin_pos,ref));
    if(indel_begin_pos != indel_end_pos) {
        iri.ihpol=std::max(iri.ihpol,get_interupted_hpol_size(indel_end_pos-1,ref));
        iri.ihpol=std::max(iri.ihpol,get_interupted_hpol_size(indel_end_pos,ref));
    }
}
static
void
set_repeat_info(const indel_key& ik,
                const reference_contig_segment& ref,
                starling_indel_report_info& iri)
{
    iri.is_repeat_unit = false;
    iri.repeat_unit = "N/A";
    iri.ref_repeat_count = 0;
    iri.indel_repeat_count = 0;

    if(! ((iri.it == INDEL::INSERT) ||
          (iri.it == INDEL::DELETE) ||
          (iri.it == INDEL::SWAP))) return;

    unsigned insert_repeat_count(0);
    unsigned delete_repeat_count(0);

    if       (iri.it == INDEL::INSERT) {
        get_seq_repeat_unit(iri.indel_seq,iri.repeat_unit,insert_repeat_count);
    } else if(iri.it == INDEL::DELETE) {
        get_seq_repeat_unit(iri.ref_seq,iri.repeat_unit,delete_repeat_count);
    } else if(iri.it == INDEL::SWAP) {
        std::string insert_ru;
        std::string delete_ru;
        get_seq_repeat_unit(iri.indel_seq,insert_ru,insert_repeat_count);
        get_seq_repeat_unit(iri.ref_seq,delete_ru,delete_repeat_count);
        if((insert_ru != delete_ru) || insert_ru.empty()) return;

        iri.repeat_unit=insert_ru;
    } else {
        assert(0);
    }

    // count repeats in contextual sequence:
    unsigned indel_context_repeat_count(0);
    {
        const pos_t indel_begin_pos(ik.pos);
        const pos_t indel_end_pos(ik.right_pos());
        const int repeat_unit_size(static_cast<int>(iri.repeat_unit.size()));

        // count upstream repeats:
        for(pos_t i(indel_begin_pos-repeat_unit_size); i>=0; i-=repeat_unit_size) {
            bool is_repeat(true);
            for(int j(0); j<repeat_unit_size; ++j) {
                if(ref.get_base(i+j) != iri.repeat_unit[j]) {
                    is_repeat = false;
                    break;
                }
            }
            if(! is_repeat) break;
            indel_context_repeat_count += 1;
        }

        // count downstream repeats:
        const pos_t rs(ref.end());
        for(pos_t i(indel_end_pos); (i+static_cast<pos_t>(repeat_unit_size)-1)<rs; i+=repeat_unit_size) {
            bool is_repeat(true);
            for(int j(0); j<repeat_unit_size; ++j) {
                if(ref.get_base(i+j) != iri.repeat_unit[j]) {
                    is_repeat = false;
                    break;
                }
            }
            if(! is_repeat) break;
            indel_context_repeat_count += 1;
        }
    }

    iri.is_repeat_unit = true;
    iri.ref_repeat_count = indel_context_repeat_count+delete_repeat_count;
    iri.indel_repeat_count = indel_context_repeat_count+insert_repeat_count;
}