Exemplo n.º 1
0
void
apath_limit_read_length(
    const unsigned target_read_start,
    const unsigned target_read_end,
    path_t& apath)
{
    bool isStartSet(false);

    unsigned read_length(0);
    const unsigned as(apath.size());
    unsigned startSegment(0);
    unsigned endSegment(as);
    for (unsigned i(0); i<as; ++i)
    {
        path_segment& ps(apath[i]);
        if (! is_segment_type_read_length(ps.type)) continue;
        read_length += ps.length;

        if ((! isStartSet) && (read_length > target_read_start))
        {
            {
                const unsigned extra(ps.length - (read_length - target_read_start));
                assert(ps.length > extra);
                ps.length -= extra;
            }
            startSegment=i;
            isStartSet=true;
        }

        if (read_length >= target_read_end)
        {
            if (read_length > target_read_end)
            {
                const unsigned extra(read_length - target_read_end);
                assert(ps.length > extra);
                ps.length -= extra;
            }
            endSegment=i+1;
            break;
        }
    }
    apath = path_t(apath.begin()+startSegment,apath.begin()+endSegment);
}
Exemplo n.º 2
0
void
export_md_to_apath(const char* md,
                   const bool is_fwd_strand,
                   path_t& apath,
                   const bool is_edge_deletion_error)
{

    // to make best use of previous code, we parse the MD in the
    // alignment direction and then orient apath to the forward strand
    // as a second step if required
    //
    assert(NULL != md);

    apath.clear();
    export_md_to_apath_impl(md,apath);

    unsigned as(apath.size());

    if ( ((as>0) and (apath.front().type == DELETE)) or
         ((as>1) and (apath.back().type == DELETE)) )
    {
        std::ostringstream oss;
        if (is_edge_deletion_error)
        {
            oss << "ERROR: ";
        }
        else
        {
            oss << "WARNING: ";
        }
        oss << "alignment path: " << apath_to_cigar(apath) << " contains meaningless edge deletion.\n";
        if (is_edge_deletion_error)
        {
            throw blt_exception(oss.str().c_str());
        }
        else
        {
            log_os << oss.str();
            path_t apath2;
            for (unsigned i(0); i<as; ++i)
            {
                if (((i==0) or ((i+1)==as)) and
                    apath[i].type == DELETE) continue;
                apath2.push_back(apath[i]);
            }
            apath=apath2;
            as=apath.size();
        }
    }

    if ( (not is_fwd_strand) and (as>1) )
    {
        std::reverse(apath.begin(),apath.end());
    }
}
Exemplo n.º 3
0
void
apath_clip_adder(path_t& apath,
                 const unsigned hc_lead,
                 const unsigned hc_trail,
                 const unsigned sc_lead,
                 const unsigned sc_trail)
{
    path_t apath2;
    path_segment ps;
    if (hc_lead>0)
    {
        ps.type = HARD_CLIP;
        ps.length = hc_lead;
        apath2.push_back(ps);
    }
    if (sc_lead>0)
    {
        ps.type = SOFT_CLIP;
        ps.length = sc_lead;
        apath2.push_back(ps);
    }
    apath2.insert(apath2.end(),apath.begin(),apath.end());
    if (sc_trail>0)
    {
        ps.type = SOFT_CLIP;
        ps.length = sc_trail;
        apath2.push_back(ps);
    }
    if (hc_trail>0)
    {
        ps.type = HARD_CLIP;
        ps.length = hc_trail;
        apath2.push_back(ps);
    }
    apath=apath2;
}
Exemplo n.º 4
0
static
void
fwd_apath_to_export_md(path_t& apath,
                       const char* ref_begin,
                       const char* ref_bases,
                       const char* ref_end,
                       const char* read_bases,
                       std::string& md)
{

    // process the align path
    bool foundUnsupportedCigar = false;
    path_t::const_iterator pCIter;
    for (pCIter = apath.begin(); pCIter != apath.end(); ++pCIter)
    {

        if (pCIter->type == DELETE)
        {

            // handle deletion
            md.push_back('^');
            for (uint32_t i = 0; i < pCIter->length; ++i, ++ref_bases)
            {
                md.push_back(*ref_bases);
            }
            md.push_back('$');

        }
        else if (pCIter->type == INSERT)
        {

            // handle insertion
            md.push_back('^');
            md += boost::lexical_cast<std::string>(pCIter->length);
            read_bases += pCIter->length;
            md.push_back('$');

        }
        else if (is_segment_align_match(pCIter->type))
        {

            // handle match/mismatch
            uint32_t numMatchingBases = 0;
            for (uint32_t i = 0; i < pCIter->length; ++i, ++ref_bases, ++read_bases)
            {

                // handle circular genome
                if ((ref_bases < ref_begin) || (ref_bases > ref_end))
                {
                    md.push_back('N');
                    continue;
                }

                if (*ref_bases != *read_bases)
                {

                    // write the number of preceding matching bases
                    if (numMatchingBases != 0)
                    {
                        md += boost::lexical_cast<std::string>(numMatchingBases);
                        numMatchingBases = 0;
                    }

                    // output the mismatched base
                    md.push_back(*ref_bases);

                }
                else ++numMatchingBases;
            }

            // write the number of trailing matching bases
            if (numMatchingBases != 0)
            {
                md += boost::lexical_cast<std::string>(numMatchingBases);
            }

        }
        else
        {

            // handle unsupported CIGAR operation
            foundUnsupportedCigar = true;
            break;
        }
    }

    if (foundUnsupportedCigar) md = "UNSUPPORTED";
}