void BAMUtils::padded_alignment() { Cigar cig = bam_record.get_cigar(); Sequence tdna = bam_record.get_seq(); int sdna_pos = 0; int tdna_pos = 0; pad_source.reserve(t_dna.length()); pad_target.reserve(t_dna.length()); pad_match.reserve(t_dna.length()); Sequence::iterator tdna_itr = tdna.get_iterator(); int tot = 0; //find out if the first cigar op could be soft clipped or not is_three_prime_soft_clipped = false; for (Cigar::iterator i = cig.get_iterator(); i.good(); i.next()) { //i.op(); i.len(); if (this->bam_record.mapped_reverse_strand()) { if (tot > ( cig.get_length( ) - 3) ){ if (i.op() == 'S') is_three_prime_soft_clipped = true; else is_three_prime_soft_clipped = false; } } else { if (tot < 2) { if (i.op() == 'S') is_three_prime_soft_clipped = true; else is_three_prime_soft_clipped = false; } } if (i.op() == 'I' ) { pad_source.append(i.len(), '-'); int count = 0; tdna_itr.set_position(tdna_pos); while (tdna_itr.good()) { if (count >= i.len()) { break; } else { pad_target += tdna_itr.get(); tdna_itr.next(); tdna_pos++; count++; } } pad_match.append(i.len(), '+'); } else if(i.op() == 'D' || i.op() == 'N') { pad_source.append( t_dna.substr(sdna_pos, i.len())); sdna_pos += i.len(); pad_target.append(i.len(), '-'); pad_match.append(i.len(), '-'); } else if(i.op() == 'P') { pad_source.append(i.len(), '*'); pad_target.append(i.len(), '*'); pad_match.append(i.len(), ' '); } else if (i.op() == 'S') { if (!truncate_soft_clipped) { pad_source.append(i.len(), '-'); pad_match.append(i.len(), '+'); pad_target.append(i.len(), '+'); } int count = 0; while (tdna_itr.good()) { if (count >= i.len()) { break; } tdna_pos++; tdna_itr.next(); count++; } } else if (i.op() == 'H') { //nothing for clipped bases }else { std::string ps, pt, pm; ps.reserve(i.len()); pm.reserve(i.len()); ps = t_dna.substr(sdna_pos,i.len()); //tdna is really qdna tdna_itr.set_position(tdna_pos); int count = 0; while (tdna_itr.good()) { if (count < i.len()) { pt += tdna_itr.get(); } else { break; } tdna_itr.next(); count++; } for (unsigned int z = 0; z < ps.length(); z++) { if (ps[z] == pt[z]) { pad_match += '|'; } else if (ps[z] != 'A' || ps[z] != 'C' || ps[z] != 'G' || ps[z] != 'T') { if (iupac_flag) { std::vector<char> nukes(IUPAC::get_base(ps[z])); bool replaced = false; unsigned int nuke_ptr = 0; for (unsigned int n = 0; n < nukes.size(); n++) { if (nukes[n] == pt[z]) { pad_match += '|'; replaced = true; nuke_ptr = n; break; } //nuke_ptr++; } if (!replaced) { pad_match += ' '; } else if (!keep_iupac) { //std::cerr << "nukes["<<nuke_ptr<<"]: " << nukes[nuke_ptr] << " nukes.size() " << nukes.size() << std::endl; ps[z] = nukes[nuke_ptr]; }//keep_iupac }//iupac_flag else { pad_match += ' '; } }//end else if checking ps[z] agianst nukes else { pad_match += ' '; } }//end for loop pad_source += ps; pad_target += pt; sdna_pos += i.len(); tdna_pos += i.len(); } tot++; } /* std::cerr << "pad_source: " << pad_source << std::endl; std::cerr << "pad_target: " << pad_target << std::endl; std::cerr << "pad_match : " << pad_match << std::endl; */ }
void BAMUtils::dna() { MD md = bam_record.get_md(); Cigar cig = bam_record.get_cigar(); Sequence qseq = bam_record.get_seq(); int position = 0; std::string seq; Sequence::iterator qseq_itr = qseq.get_iterator(); for (Cigar::iterator i = cig.get_iterator(); i.good(); i.next()) { if (i.op() == 'M') { int count = 0; while (qseq_itr.good()) { if (count >= i.len()) { break; } else { seq += qseq_itr.get(); qseq_itr.next(); count++; } } } else if ((i.op() == 'I') || (i.op() == 'S')) { int count = 0; while (qseq_itr.good()) { if (count >= i.len()) { break; } qseq_itr.next(); count++; } //bool is_error = false; if (i.op() == 'S') { soft_clipped_bases += i.len(); //is_error = true; } } position++; } t_dna.reserve(seq.length()); int start = 0; MD::iterator md_itr = md.get_iterator(); std::string num; coord_t md_len = 0; char cur; while (md_itr.good()) { cur = md_itr.get(); if (std::isdigit(cur)) { num+=cur; //md_itr.next(); } else { if (num.length() > 0) { md_len = convert(num); num.clear(); t_dna += seq.substr(start, md_len); start += md_len; } } if (cur == '^') { //get nuc md_itr.next(); char nuc = md_itr.get(); while (std::isalpha(nuc)) { t_dna += nuc; md_itr.next(); nuc = md_itr.get(); } num += nuc; //it's a number now will //lose this value if i don't do it here //cur = nuc; } else if (std::isalpha(cur)) { t_dna += cur; start++; } md_itr.next(); } //clean up residual num if there is any if (num.length() > 0) { md_len = convert(num); num.clear(); t_dna += seq.substr(start, md_len); start += md_len; } }