void
export_md_to_apath(const char* md,
                   const bool is_fwd_strand,
                   path_t& apath,
                   const bool is_edge_deletion_error)
{

    // to make best use of previous code, we parse the MD in the
    // alignment direction and then orient apath to the forward strand
    // as a second step if required
    //
    assert(NULL != md);

    apath.clear();
    export_md_to_apath_impl(md,apath);

    unsigned as(apath.size());

    if ( ((as>0) and (apath.front().type == DELETE)) or
         ((as>1) and (apath.back().type == DELETE)) )
    {
        std::ostringstream oss;
        if (is_edge_deletion_error)
        {
            oss << "ERROR: ";
        }
        else
        {
            oss << "WARNING: ";
        }
        oss << "alignment path: " << apath_to_cigar(apath) << " contains meaningless edge deletion.\n";
        if (is_edge_deletion_error)
        {
            throw blt_exception(oss.str().c_str());
        }
        else
        {
            log_os << oss.str();
            path_t apath2;
            for (unsigned i(0); i<as; ++i)
            {
                if (((i==0) or ((i+1)==as)) and
                    apath[i].type == DELETE) continue;
                apath2.push_back(apath[i]);
            }
            apath=apath2;
            as=apath.size();
        }
    }

    if ( (not is_fwd_strand) and (as>1) )
    {
        std::reverse(apath.begin(),apath.end());
    }
}
Beispiel #2
0
bool
is_segment_swap_start(const path_t& apath,
                      unsigned i)
{
    using namespace ALIGNPATH;

    bool is_insert(false);
    bool is_delete(false);

    const unsigned as(apath.size());
    for (; i<as; ++i)
    {
        if     (apath[i].type == INSERT)
        {
            is_insert=true;
        }
        else if (apath[i].type == DELETE)
        {
            is_delete=true;
        }
        else
        {
            break;
        }
    }

    return (is_insert && is_delete);
}
Beispiel #3
0
void
apath_limit_ref_length(
    const unsigned target_ref_length,
    path_t& apath)
{
    unsigned ref_length(0);

    const unsigned as(apath.size());
    for (unsigned i(0); i<as; ++i)
    {
        path_segment& ps(apath[i]);
        if (! is_segment_type_ref_length(ps.type)) continue;
        ref_length += ps.length;

        if (ref_length < target_ref_length) continue;

        if (ref_length > target_ref_length)
        {
            const unsigned extra(ref_length - target_ref_length);
            assert(ps.length > extra);
            ps.length -= extra;
        }
        apath.resize(i+1);
        break;
    }
}
Beispiel #4
0
bool
is_clipped_front(const path_t& apath)
{
    const unsigned as(apath.size());
    if (as==0) return false;
    if ((apath[0].type == SOFT_CLIP) || (apath[0].type == HARD_CLIP)) return true;
    return false;
}
void
apath_to_bam_cigar(const path_t& apath,
                   uint32_t* bam_cigar) {

    const unsigned as(apath.size());
    for (unsigned i(0); i<as; ++i) {
        const path_segment& ps(apath[i]);
        assert(ps.type != NONE);
        bam_cigar[i] = (ps.length<<BAM_CIGAR_SHIFT | (static_cast<uint32_t>(ps.type)-1));
    }
}
Beispiel #6
0
	    accessor get_path(path_t const& path) const {
		accessor next = *this;
		for (size_t i = 0; i < path.size() && next.is_valid; ++i) {
		    const std::string* key;
		    const int* idx;
		    if ((key = boost::get<std::string>(&path[i]))) {
			next = next[*key];
		    } else if ((idx = boost::get<int>(&path[i]))) {
			next = next[*idx];
		    }
		}
		return next;
	    }
Beispiel #7
0
bool
is_seq_swap(const path_t& apath)
{
    const unsigned as(apath.size());
    for (unsigned i(0); (i+1)<as; ++i)
    {
        if (is_segment_type_indel(apath[i].type) &&
            is_segment_type_indel(apath[i+1].type))
        {
            return true;
        }
    }
    return false;
}
Beispiel #8
0
void
apath_append(
    path_t& apath,
    const align_t seg_type,
    const unsigned length)
{
    if (apath.size() && apath.back().type == seg_type)
    {
        apath.back().length += length;
    }
    else
    {
        apath.emplace_back(seg_type,length);
    }
}
Beispiel #9
0
unsigned
get_clip_len(const path_t& apath)
{
    const unsigned as(apath.size());
    if (as==0) return 0;
    if ((apath[0].type == SOFT_CLIP) || (apath[0].type == HARD_CLIP))
    {
        return apath[0].length;
    }
    if (as>1)
    {
        if ((apath[as-1].type == SOFT_CLIP) || (apath[as-1].type == HARD_CLIP))
        {
            return apath[as-1].length;
        }
    }
    return 0;
}
Beispiel #10
0
std::pair<unsigned,unsigned>
get_match_edge_segments(const path_t& apath)
{
    const unsigned as(apath.size());
    std::pair<unsigned,unsigned> res(as,as);
    bool is_first_match(false);
    for (unsigned i(0); i<as; ++i)
    {
        const path_segment& ps(apath[i]);
        if (is_segment_align_match(ps.type))
        {
            if (! is_first_match) res.first=i;
            is_first_match=true;
            res.second=i;
        }
    }
    return res;
}
void
edit_bam_cigar(const path_t& apath,
               bam1_t& br) {

    bam1_core_t& bc(br.core);

    const int old_n_cigar(bc.n_cigar);
    const int new_n_cigar(apath.size());
    const int delta(4*(new_n_cigar-old_n_cigar));

    if (0 != delta) {
        const int end(bc.l_qname+(4*old_n_cigar));
        change_bam_data_segment_len(end,delta,br);
        bc.n_cigar=new_n_cigar;
    }

    //update content of cigar array:
    apath_to_bam_cigar(apath,bam1_cigar(&br));
}
Beispiel #12
0
void
apath_limit_read_length(
    const unsigned target_read_start,
    const unsigned target_read_end,
    path_t& apath)
{
    bool isStartSet(false);

    unsigned read_length(0);
    const unsigned as(apath.size());
    unsigned startSegment(0);
    unsigned endSegment(as);
    for (unsigned i(0); i<as; ++i)
    {
        path_segment& ps(apath[i]);
        if (! is_segment_type_read_length(ps.type)) continue;
        read_length += ps.length;

        if ((! isStartSet) && (read_length > target_read_start))
        {
            {
                const unsigned extra(ps.length - (read_length - target_read_start));
                assert(ps.length > extra);
                ps.length -= extra;
            }
            startSegment=i;
            isStartSet=true;
        }

        if (read_length >= target_read_end)
        {
            if (read_length > target_read_end)
            {
                const unsigned extra(read_length - target_read_end);
                assert(ps.length > extra);
                ps.length -= extra;
            }
            endSegment=i+1;
            break;
        }
    }
    apath = path_t(apath.begin()+startSegment,apath.begin()+endSegment);
}
Beispiel #13
0
bool
is_edge_readref_len_segment(const path_t& apath)
{
    const unsigned as(apath.size());
    if (as==0) return false;

    const std::pair<unsigned,unsigned> ends(get_match_edge_segments(apath));

    // at this point we assume the alignment has been sanity checked for legal clipping,
    // where hard-clip is only on the outside, next soft-clipping, then anything else...
    //
    for (unsigned i(0); i<as; ++i)
    {
        const path_segment& ps(apath[i]);

        const bool is_edge_segment((i<ends.first) || (i>ends.second));
        const bool is_clip_type(ps.type==INSERT || ps.type==DELETE || ps.type==SKIP || ps.type==SOFT_CLIP);
        if (is_edge_segment && is_clip_type) return true;
    }
    return false;
}
Beispiel #14
0
std::pair<unsigned,unsigned>
get_nonclip_end_segments(const path_t& apath)
{
    const unsigned as(apath.size());
    std::pair<unsigned,unsigned> res(as,as);
    bool is_first_nonclip(false);
    for (unsigned i(0); i<as; ++i)
    {
        const path_segment& ps(apath[i]);
        if (! (ps.type == SOFT_CLIP ||
               ps.type == HARD_CLIP))
        {
            if (! is_first_nonclip)
            {
                res.first=i;
                is_first_nonclip=true;
            }
            res.second=i;
        }
    }
    return res;
}
Beispiel #15
0
ALIGN_ISSUE::issue_t
get_apath_invalid_type(const path_t& apath,
                       const unsigned seq_length)
{
    bool is_match(false);
    align_t last_type(NONE);
    const unsigned as(apath.size());
    for (unsigned i(0); i<as; ++i)
    {
        const path_segment& ps(apath[i]);

        if (ps.type==NONE) return ALIGN_ISSUE::UNKNOWN_SEGMENT;
        if ((i!=0) && ps.type==last_type) return ALIGN_ISSUE::REPEATED_SEGMENT;

        if (! is_match)
        {
            if (ps.type==SKIP) return ALIGN_ISSUE::EDGE_SKIP;
        }

        if (ps.type==HARD_CLIP)
        {
            if (! ((i==0) || ((i+1)==as))) return ALIGN_ISSUE::CLIPPING;
        }

        if (ps.type==SOFT_CLIP)
        {
            if (! ((i==0) || ((i+1)==as)))
            {
                if (i==1)
                {
                    if (as==3)
                    {
                        if ((apath[0].type != HARD_CLIP) && (apath[i+1].type != HARD_CLIP)) return ALIGN_ISSUE::CLIPPING;
                    }
                    else
                    {
                        if (apath[0].type != HARD_CLIP) return ALIGN_ISSUE::CLIPPING;
                    }
                }
                else if ((i+2)==as)
                {
                    if (apath[i+1].type != HARD_CLIP) return ALIGN_ISSUE::CLIPPING;
                }
                else
                {
                    return ALIGN_ISSUE::CLIPPING;
                }
            }
        }

        if ((! is_match) && (is_segment_align_match(ps.type))) is_match=true;

        last_type=ps.type;
    }

    if (! is_match) return ALIGN_ISSUE::FLOATING;

    // run in reverse to finish checking condition (2a):
    for (unsigned i(0); i<as; ++i)
    {
        const path_segment& ps(apath[as-(i+1)]);
        if (is_segment_align_match(ps.type)) break;
        //if(ps.type==DELETE) return ALIGN_ISSUE::EDGE_DELETE;
        if (ps.type==SKIP) return ALIGN_ISSUE::EDGE_SKIP;
    }

    if (seq_length != apath_read_length(apath)) return ALIGN_ISSUE::LENGTH;

    return ALIGN_ISSUE::NONE;
}
Beispiel #16
0
// 1. remove zero-length segments
// 2. remove pads
// 3. condense repeated segment types
// 4. reduce adjacent insertion/deletion tags to a single pair
// 5. replace NDN pattern with single SKIP segment
//
// return true if path has been altered
//
bool
apath_cleaner(path_t& apath)
{
    bool is_cleaned(false);
    const unsigned as(apath.size());
    unsigned insertIndex(as);
    unsigned deleteIndex(as);
    unsigned otherIndex(as);
    for (unsigned i(0); i<as; ++i)
    {
        path_segment& ps(apath[i]);
        if       (ps.length == 0)
        {
            is_cleaned = true;
        }
        else if (ps.type == PAD)
        {
            ps.length = 0;
            is_cleaned = true;
        }
        else if (ps.type == INSERT)
        {
            if (insertIndex < as)
            {
                apath[insertIndex].length += ps.length;
                ps.length = 0;
                is_cleaned = true;
            }
            else
            {
                insertIndex = i;
            }
        }
        else if (ps.type == DELETE)
        {
            if (deleteIndex < as)
            {
                apath[deleteIndex].length += ps.length;
                ps.length = 0;
                is_cleaned = true;
            }
            else
            {
                deleteIndex = i;
            }
        }
        else
        {
            if ((insertIndex<as) || (deleteIndex<as))
            {
                insertIndex = as;
                deleteIndex = as;
                otherIndex = as;
            }
            if ((otherIndex < as) && (apath[otherIndex].type == ps.type))
            {
                apath[otherIndex].length += ps.length;
                ps.length = 0;
                is_cleaned = true;
            }
            else
            {
                otherIndex = i;
            }
        }
    }

    // convert NDN to single N:
    for (unsigned i(0); i<as; ++i)
    {
        path_segment& ps(apath[i]);
        if (ps.type == SKIP)
        {
            if ( (i+2)<as)
            {
                if ((apath[i+1].type == DELETE) && (apath[i+2].type == SKIP))
                {
                    for (unsigned j(1); j<3; ++j)
                    {
                        ps.length += apath[i+j].length;
                        apath[i+j].length = 0;
                    }
                    is_cleaned = true;
                }
            }
        }
    }

    if (is_cleaned)
    {
        path_t apath2;
        for (const path_segment& ps : apath)
        {
            if (ps.length == 0) continue;
            apath2.push_back(ps);
        }
        apath = apath2;
    }
    return is_cleaned;
}