void apath_limit_read_length( const unsigned target_read_start, const unsigned target_read_end, path_t& apath) { bool isStartSet(false); unsigned read_length(0); const unsigned as(apath.size()); unsigned startSegment(0); unsigned endSegment(as); for (unsigned i(0); i<as; ++i) { path_segment& ps(apath[i]); if (! is_segment_type_read_length(ps.type)) continue; read_length += ps.length; if ((! isStartSet) && (read_length > target_read_start)) { { const unsigned extra(ps.length - (read_length - target_read_start)); assert(ps.length > extra); ps.length -= extra; } startSegment=i; isStartSet=true; } if (read_length >= target_read_end) { if (read_length > target_read_end) { const unsigned extra(read_length - target_read_end); assert(ps.length > extra); ps.length -= extra; } endSegment=i+1; break; } } apath = path_t(apath.begin()+startSegment,apath.begin()+endSegment); }
void export_md_to_apath(const char* md, const bool is_fwd_strand, path_t& apath, const bool is_edge_deletion_error) { // to make best use of previous code, we parse the MD in the // alignment direction and then orient apath to the forward strand // as a second step if required // assert(NULL != md); apath.clear(); export_md_to_apath_impl(md,apath); unsigned as(apath.size()); if ( ((as>0) and (apath.front().type == DELETE)) or ((as>1) and (apath.back().type == DELETE)) ) { std::ostringstream oss; if (is_edge_deletion_error) { oss << "ERROR: "; } else { oss << "WARNING: "; } oss << "alignment path: " << apath_to_cigar(apath) << " contains meaningless edge deletion.\n"; if (is_edge_deletion_error) { throw blt_exception(oss.str().c_str()); } else { log_os << oss.str(); path_t apath2; for (unsigned i(0); i<as; ++i) { if (((i==0) or ((i+1)==as)) and apath[i].type == DELETE) continue; apath2.push_back(apath[i]); } apath=apath2; as=apath.size(); } } if ( (not is_fwd_strand) and (as>1) ) { std::reverse(apath.begin(),apath.end()); } }
void apath_clip_adder(path_t& apath, const unsigned hc_lead, const unsigned hc_trail, const unsigned sc_lead, const unsigned sc_trail) { path_t apath2; path_segment ps; if (hc_lead>0) { ps.type = HARD_CLIP; ps.length = hc_lead; apath2.push_back(ps); } if (sc_lead>0) { ps.type = SOFT_CLIP; ps.length = sc_lead; apath2.push_back(ps); } apath2.insert(apath2.end(),apath.begin(),apath.end()); if (sc_trail>0) { ps.type = SOFT_CLIP; ps.length = sc_trail; apath2.push_back(ps); } if (hc_trail>0) { ps.type = HARD_CLIP; ps.length = hc_trail; apath2.push_back(ps); } apath=apath2; }
static void fwd_apath_to_export_md(path_t& apath, const char* ref_begin, const char* ref_bases, const char* ref_end, const char* read_bases, std::string& md) { // process the align path bool foundUnsupportedCigar = false; path_t::const_iterator pCIter; for (pCIter = apath.begin(); pCIter != apath.end(); ++pCIter) { if (pCIter->type == DELETE) { // handle deletion md.push_back('^'); for (uint32_t i = 0; i < pCIter->length; ++i, ++ref_bases) { md.push_back(*ref_bases); } md.push_back('$'); } else if (pCIter->type == INSERT) { // handle insertion md.push_back('^'); md += boost::lexical_cast<std::string>(pCIter->length); read_bases += pCIter->length; md.push_back('$'); } else if (is_segment_align_match(pCIter->type)) { // handle match/mismatch uint32_t numMatchingBases = 0; for (uint32_t i = 0; i < pCIter->length; ++i, ++ref_bases, ++read_bases) { // handle circular genome if ((ref_bases < ref_begin) || (ref_bases > ref_end)) { md.push_back('N'); continue; } if (*ref_bases != *read_bases) { // write the number of preceding matching bases if (numMatchingBases != 0) { md += boost::lexical_cast<std::string>(numMatchingBases); numMatchingBases = 0; } // output the mismatched base md.push_back(*ref_bases); } else ++numMatchingBases; } // write the number of trailing matching bases if (numMatchingBases != 0) { md += boost::lexical_cast<std::string>(numMatchingBases); } } else { // handle unsupported CIGAR operation foundUnsupportedCigar = true; break; } } if (foundUnsupportedCigar) md = "UNSUPPORTED"; }