Exemplo n.º 1
0
void BAMUtils::padded_alignment() {
	Cigar cig = bam_record.get_cigar();
	Sequence tdna = bam_record.get_seq();

	int sdna_pos = 0;
	int tdna_pos = 0;
	pad_source.reserve(t_dna.length());
	pad_target.reserve(t_dna.length());
	pad_match.reserve(t_dna.length());
	Sequence::iterator tdna_itr = tdna.get_iterator();
	int tot = 0;
	//find out if the first cigar op could be soft clipped or not
	is_three_prime_soft_clipped = false;


	for (Cigar::iterator i = cig.get_iterator(); i.good(); i.next()) {
		//i.op();		i.len();
		if (this->bam_record.mapped_reverse_strand()) {
			if (tot > ( cig.get_length( ) - 3) ){
				if (i.op() == 'S')
					is_three_prime_soft_clipped = true;
				else
					is_three_prime_soft_clipped = false;

			}
		} else {
			if (tot < 2) {
				if (i.op() == 'S')
					is_three_prime_soft_clipped = true;
				else
					is_three_prime_soft_clipped = false;

			}
		}

		if (i.op() == 'I' ) {
			pad_source.append(i.len(), '-');
					
			int count = 0;
			tdna_itr.set_position(tdna_pos);
			
			while (tdna_itr.good()) {
				if (count >= i.len()) {
					break;
				} else {
					pad_target += tdna_itr.get();
					tdna_itr.next();
					
					tdna_pos++;
					count++;
				}
				

			}
			pad_match.append(i.len(), '+');
		}
		else if(i.op() == 'D' || i.op() == 'N') {
			pad_source.append( t_dna.substr(sdna_pos, i.len()));
			sdna_pos += i.len();
			pad_target.append(i.len(), '-');
			pad_match.append(i.len(), '-');
			
			
		}
		else if(i.op() == 'P') {
			pad_source.append(i.len(), '*');

			pad_target.append(i.len(), '*');
			pad_match.append(i.len(), ' ');
			
			
			
			
		} else if (i.op() == 'S') {

			if (!truncate_soft_clipped) {

					pad_source.append(i.len(), '-');
					pad_match.append(i.len(), '+');
					pad_target.append(i.len(), '+');

			}	
			int count = 0;
			while (tdna_itr.good()) {
				if (count >= i.len()) {
					break;
				}		
				tdna_pos++;
				tdna_itr.next();

				count++;
			}
			

						
		}
		
		else if (i.op() == 'H') {
			//nothing for clipped bases
		}else {
			std::string ps, pt, pm;
			ps.reserve(i.len());
			pm.reserve(i.len());

			ps = t_dna.substr(sdna_pos,i.len()); //tdna is really qdna

			tdna_itr.set_position(tdna_pos);
			int count = 0;
			
			while (tdna_itr.good()) {
				if (count < i.len()) {
					pt += tdna_itr.get();
				} else {
					break;
				}

				tdna_itr.next();
				count++;

			}
			for (unsigned int z = 0; z < ps.length(); z++) {
				if (ps[z] == pt[z]) {
					pad_match += '|';
				} else if (ps[z] != 'A' || ps[z] != 'C' || ps[z] != 'G' || ps[z] != 'T') {
					if (iupac_flag) {
						
						std::vector<char> nukes(IUPAC::get_base(ps[z]));
						bool replaced = false;
						unsigned int nuke_ptr = 0;
						for (unsigned int n = 0; n < nukes.size(); n++) {
							if (nukes[n] == pt[z]) {
								pad_match += '|';
								replaced  = true;
								nuke_ptr = n;
								break;
							}
							//nuke_ptr++;
						}
						if (!replaced) {
							pad_match += ' ';
						}
						else if (!keep_iupac) {
							//std::cerr << "nukes["<<nuke_ptr<<"]: " << nukes[nuke_ptr] << " nukes.size() " << nukes.size() << std::endl;
							ps[z] = nukes[nuke_ptr];
						}//keep_iupac
					}//iupac_flag
					else {
						pad_match += ' ';
					}
				}//end else if checking ps[z] agianst nukes
				else {
					pad_match += ' ';
				}


			}//end for loop
			pad_source += ps;
			pad_target += pt;
			sdna_pos += i.len();
			tdna_pos += i.len();

			
			
		}
		tot++;

	}
	/*
	std::cerr << "pad_source: " << pad_source << std::endl;
	std::cerr << "pad_target: " << pad_target << std::endl;
	std::cerr << "pad_match : " << pad_match << std::endl;
	*/
}
Exemplo n.º 2
0
void BAMUtils::dna() {
	
	
	
	MD md = bam_record.get_md();
	Cigar cig = bam_record.get_cigar();
	Sequence qseq = bam_record.get_seq();
	


	int position = 0;
	std::string seq;
	Sequence::iterator qseq_itr = qseq.get_iterator();
	for (Cigar::iterator i = cig.get_iterator(); i.good(); i.next()) {
		
		
		if (i.op() == 'M') {
			int count = 0;
			while (qseq_itr.good()) {
				
				if (count >= i.len()) {
					break;
				} else {
					seq += qseq_itr.get();
					qseq_itr.next();
					count++;

				}
			}
			

		} else if ((i.op() == 'I') || (i.op() == 'S')) {
			int count = 0;
			while (qseq_itr.good()) {
				if (count >= i.len()) {
					break;
				}				
				qseq_itr.next();
				count++;
				
			}
			//bool is_error = false;

			if (i.op() == 'S') {
				soft_clipped_bases += i.len();
				//is_error = true;

			}

			
		} 
		position++;
	}
	
	
	t_dna.reserve(seq.length());
	int start = 0;
	MD::iterator md_itr = md.get_iterator();
	std::string num;
	coord_t md_len = 0;
	char cur;

	while (md_itr.good()) {
		cur = md_itr.get();
		
		if (std::isdigit(cur)) {
			num+=cur;
			//md_itr.next();
		}
		else {
			if (num.length() > 0) {
				md_len = convert(num);
				num.clear();
			
				t_dna += seq.substr(start, md_len);
				start += md_len;
				
			}
			
		}
				
		if (cur == '^') {
			//get nuc
			md_itr.next();
			char nuc = md_itr.get();
			while (std::isalpha(nuc)) {
				t_dna += nuc;
				md_itr.next();
				nuc = md_itr.get();
			}
			num += nuc; //it's a number now will
						//lose this value if i don't do it here
			//cur = nuc;				
			
		} else if (std::isalpha(cur)) {
			t_dna += cur;
			start++;

		}
		md_itr.next();
		

	}
	//clean up residual num if there is any
	if (num.length() > 0) {
		md_len = convert(num);
		num.clear();
		t_dna += seq.substr(start, md_len);
		start += md_len;
	}
	

	
}