예제 #1
0
파일: seq_util.cpp 프로젝트: BadSeby/manta
std::size_t
get_ref_seq_known_size(const reference_contig_segment& ref,
                       const pos_range pr)
{
    pos_t b(0);
    pos_t end(ref.end());
    if (pr.is_begin_pos && (pr.begin_pos>0)) b=pr.begin_pos;
    if (pr.is_end_pos && (pr.end_pos>0)) end=std::min(end,pr.end_pos);
    std::size_t size(0);
    for (; b<end; ++b)
    {
        if (ref.get_base(b) != 'N') size++;
    }
    return size;
}
static
void
set_repeat_info(const indel_key& ik,
                const reference_contig_segment& ref,
                starling_indel_report_info& iri)
{
    iri.is_repeat_unit = false;
    iri.repeat_unit = "N/A";
    iri.ref_repeat_count = 0;
    iri.indel_repeat_count = 0;

    if(! ((iri.it == INDEL::INSERT) ||
          (iri.it == INDEL::DELETE) ||
          (iri.it == INDEL::SWAP))) return;

    unsigned insert_repeat_count(0);
    unsigned delete_repeat_count(0);

    if       (iri.it == INDEL::INSERT) {
        get_seq_repeat_unit(iri.indel_seq,iri.repeat_unit,insert_repeat_count);
    } else if(iri.it == INDEL::DELETE) {
        get_seq_repeat_unit(iri.ref_seq,iri.repeat_unit,delete_repeat_count);
    } else if(iri.it == INDEL::SWAP) {
        std::string insert_ru;
        std::string delete_ru;
        get_seq_repeat_unit(iri.indel_seq,insert_ru,insert_repeat_count);
        get_seq_repeat_unit(iri.ref_seq,delete_ru,delete_repeat_count);
        if((insert_ru != delete_ru) || insert_ru.empty()) return;

        iri.repeat_unit=insert_ru;
    } else {
        assert(0);
    }

    // count repeats in contextual sequence:
    unsigned indel_context_repeat_count(0);
    {
        const pos_t indel_begin_pos(ik.pos);
        const pos_t indel_end_pos(ik.right_pos());
        const int repeat_unit_size(static_cast<int>(iri.repeat_unit.size()));

        // count upstream repeats:
        for(pos_t i(indel_begin_pos-repeat_unit_size); i>=0; i-=repeat_unit_size) {
            bool is_repeat(true);
            for(int j(0); j<repeat_unit_size; ++j) {
                if(ref.get_base(i+j) != iri.repeat_unit[j]) {
                    is_repeat = false;
                    break;
                }
            }
            if(! is_repeat) break;
            indel_context_repeat_count += 1;
        }

        // count downstream repeats:
        const pos_t rs(ref.end());
        for(pos_t i(indel_end_pos); (i+static_cast<pos_t>(repeat_unit_size)-1)<rs; i+=repeat_unit_size) {
            bool is_repeat(true);
            for(int j(0); j<repeat_unit_size; ++j) {
                if(ref.get_base(i+j) != iri.repeat_unit[j]) {
                    is_repeat = false;
                    break;
                }
            }
            if(! is_repeat) break;
            indel_context_repeat_count += 1;
        }
    }

    iri.is_repeat_unit = true;
    iri.ref_repeat_count = indel_context_repeat_count+delete_repeat_count;
    iri.indel_repeat_count = indel_context_repeat_count+insert_repeat_count;
}