Пример #1
0
			ReadEnds(
				::libmaus::bambam::BamAlignment const & p, 
				::libmaus::bambam::BamAlignment const & q, 
				::libmaus::bambam::BamHeader const & header,
				::libmaus::bambam::ReadEnds & RE,
				bool const copyAlignment = false
			)
			{
				reset();
				fillFragPair(p,q,header,RE);
				if ( copyAlignment )
				{
					 this->p = p.sclone();
					 this->q = q.sclone();
				}
			}
Пример #2
0
			/**
			 * put rank aux tag in current alignment
			 **/
			void putRank()
			{
				uint64_t const lrank = rank++;
				if ( putrank )
				{
					alignment.putRank("ZR",lrank /*,bamheader */);
				}			
			}
Пример #3
0
bool strip12(libmaus::bambam::BamAlignment & algn)
{
	char const * name = algn.getName();
	
	char const * u1 = name;
	
	while ( *u1 && *u1 != '_' )
		++u1;
					
	if ( ! *u1 )
		return true;
	else
	{
		bool ok = true;
		uint64_t ranka = 0;
			
		for ( char const * t1 = name; t1 != u1; ++t1 )
		{	
			ranka *= 10;
			ranka += ((*t1)-'0');
			ok = ok && isdigit(*t1);
		}

		int const read1 = algn.isRead1() ? 1 : 0;
		int const read2 = algn.isRead2() ? 1 : 0;
			
		if ( (read1+read2 != 1) || (!ok) )
		{
			return true;
		}
		else
		{
			std::ostringstream upnamestr;

			upnamestr << (u1+1);

			std::string const upname = upnamestr.str();
				
			algn.replaceName(upname.begin(),upname.size());
			
			return true;
		}
	}

}
Пример #4
0
			static bool readAlignmentGz(
				stream_type & GZ,
				::libmaus::bambam::BamAlignment & alignment,
				::libmaus::bambam::BamHeader const * bamheader = 0,
				bool const validate = true
			)
			{
				/* read alignment block size */
				int64_t const bs0 = GZ.get();
				int64_t const bs1 = GZ.get();
				int64_t const bs2 = GZ.get();
				int64_t const bs3 = GZ.get();
				if ( bs3 < 0 )
					// reached end of file
					return false;
				
				/* assemble block size as LE integer */
				alignment.blocksize = (bs0 << 0) | (bs1 << 8) | (bs2 << 16) | (bs3 << 24) ;

				/* read alignment block */
				if ( alignment.blocksize > alignment.D.size() )
					alignment.D = ::libmaus::bambam::BamAlignment::D_array_type(alignment.blocksize,false);
				GZ.read(reinterpret_cast<char *>(alignment.D.begin()),alignment.blocksize);

				if ( static_cast<int64_t>(GZ.gcount()) != static_cast<int64_t>(alignment.blocksize) )
				{
					::libmaus::exception::LibMausException se;
					se.getStream() << "Invalid alignment (EOF in alignment block of length " << alignment.blocksize  << ")" << std::endl;
					se.finish();
					throw se;
				}
				
				if ( validate )
				{
					libmaus_bambam_alignment_validity const validity = bamheader ? alignment.valid(*bamheader) : alignment.valid();
					if ( validity != ::libmaus::bambam::libmaus_bambam_alignment_validity_ok )
					{
						::libmaus::exception::LibMausException se;
						se.getStream() << "Invalid alignment: " << validity << std::endl;
						se.finish();
						throw se;					
					}
				}
				
				return true;
			}
			virtual bool operator()(libmaus::bambam::BamAlignment const & algn) const
			{
				int64_t const rg = header.getReadGroupId(algn.getReadGroup());
				return rg >= 0 && pBV->get(rg);
			}
Пример #6
0
bool checkCigarValid(
	::libmaus::bambam::BamAlignment const & alignment,
	::libmaus::bambam::BamHeader const & bamheader,
	::libmaus::autoarray::AutoArray < ::libmaus::autoarray::AutoArray<uint8_t>::unique_ptr_type > const & text
)
{
	if ( alignment.isUnmap() )
		return true;

	if ( ! alignment.isCigarLengthConsistent() )
	{
		std::cerr << "[E] inconsistent cigar " << alignment.getCigarString() << " for " << alignment.getName() << std::endl;
		return false;
	}
	
	if ( alignment.getRefID() < 0 || alignment.getRefID() >= static_cast<int64_t>(bamheader.chromosomes.size()) )
	{
		std::cerr << "[E] reference id " << alignment.getRefID() << " out of range for " << alignment.getName() << std::endl;
		return false;
	}
	
	::libmaus::autoarray::AutoArray<uint8_t> const & ctext = *(text[alignment.getRefID()]);
	int64_t refpos = alignment.getPos();
	int64_t seqpos = 0;
	bool alok = true;
	std::string const read = alignment.getRead();
	
	for ( uint64_t i = 0; alok && i < alignment.getNCigar(); ++i )
	{
		char const cop = alignment.getCigarFieldOpAsChar(i);
		int64_t const clen = alignment.getCigarFieldLength(i);
		
		switch ( cop )
		{
			// match/mismatch, increment both
			case '=':
			case 'X':
			case 'M':
			{
				for ( int64_t j = 0; alok && j < clen; ++j, ++refpos, ++ seqpos )
				{
					if ( refpos < 0 || refpos >= static_cast<int64_t>(ctext.size()) )
					{
						std::cerr << "[E] " << cop << " operation outside of chromosome coordinate range " << " for " << alignment.getName() << std::endl;
						alok = false;
					}
					else if ( seqpos >= alignment.getLseq() )
					{
						std::cerr << "[E] " << cop << " operation outside of sequence coordinate range " << " for " << alignment.getName() << std::endl;
						alok = false;
					}
					else if ( cop == '=' && toupper(ctext[refpos]) != toupper(read[seqpos]) )
					{
						std::cerr << "[E] " << cop << " operation but mismatch between reference and query." << std::endl;
						alok = false;
					}
					else if ( cop == 'X' && toupper(ctext[refpos]) == toupper(read[seqpos]) )
					{
						std::cerr << "[E] " << cop << " operation but mismatch between reference and query." << std::endl;
						alok = false;
					}
				}
				break;
			}
			// insert into reference, increment seq
			case 'P':
			case 'I':
			{
				for ( int64_t j = 0; alok && j < clen; ++j, ++seqpos )
				{
					if ( seqpos >= alignment.getLseq() )
					{
						std::cerr << "[E] " << cop << " operation outside of sequence coordinate range " << " for " << alignment.getName() << std::endl;
						alok = false;						
					}
				}
				break;
			}
			// delete from reference, increment ref
			case 'D':
			{
				for ( int64_t j = 0; alok && j < clen; ++j, ++refpos )
				{
					if ( refpos < 0 || refpos >= static_cast<int64_t>(ctext.size()) )
					{
						std::cerr << "[E] " << cop << " operation outside of reference coordinate range " << " for " << alignment.getName() << std::endl;
						alok = false;						
					}
				}
				break;
			}
			// soft clipping, increment seq
			case 'S':
			{
				for ( int64_t j = 0; alok && j < clen; ++j, ++seqpos )
				{
					if ( seqpos >= alignment.getLseq() )
					{
						std::cerr << "[E] " << cop << " operation outside of sequence coordinate range " << " for " << alignment.getName() << std::endl;
						alok = false;						
					}
				}
				break;
			}
			// hard clipping, do nothing
			case 'H':
			{
				break;
			}
			// skip region in reference, increment ref
			case 'N':
			{
				for ( int64_t j = 0; alok && j < clen; ++j, ++refpos )
				{
					if ( refpos < 0 || refpos >= static_cast<int64_t>(ctext.size()) )
					{
						std::cerr << "[E] " << cop << " operation outside of reference coordinate range " << " for " << alignment.getName() << std::endl;
						alok = false;						
					}
				}
				break;
			}
		}
	}
	
	return alok;
}
Пример #7
0
			static void fillFragPair(
				::libmaus::bambam::BamAlignment const & p, 
				::libmaus::bambam::BamAlignment const & q, 
				::libmaus::bambam::BamHeader const & header,
				::libmaus::bambam::ReadEnds & RE
			)
			{
				RE.read1Sequence = p.getRefIDChecked() + 1;
				RE.read1Coordinate = p.getCoordinate() + 1;
				RE.read2Sequence = q.getRefIDChecked() + 1;
				RE.read2Coordinate = q.getCoordinate() + 1;
				
				if ( ! p.isReverse() )
					if ( ! q.isReverse() )
						RE.orientation = ::libmaus::bambam::ReadEnds::FF;
					else
						RE.orientation = ::libmaus::bambam::ReadEnds::FR;
				else
					if ( ! q.isReverse() )
						RE.orientation = ::libmaus::bambam::ReadEnds::RF;
					else
						RE.orientation = ::libmaus::bambam::ReadEnds::RR;
				
				RE.read1IndexInFile = p.getRank();
				RE.read2IndexInFile = q.getRank();
				
				RE.score = p.getScore() + q.getScore();
				
				if ( p.isPaired() && (!p.isMateUnmap()) )
					RE.read2Sequence = p.getNextRefIDChecked() + 1;
					
				char const * readname = p.getName();
				char const * readnamee = readname + (p.getLReadName()-1);

				int cnt[2] = { 0,0 };
				for ( char const * c = readname; c != readnamee; ++c )
					cnt [ (static_cast<int>(*c) - ':') == 0 ] ++;
				bool const rnparseok = (cnt[1] == 4);
				
				// parse tile, x, y
				if ( rnparseok )
				{
					uint8_t const * sem[4];
					uint8_t const ** psem = &sem[0];
					for ( uint8_t const * c = reinterpret_cast<uint8_t const *>(readname); c != reinterpret_cast<uint8_t const *>(readnamee); ++c )
						if ( *c == ':' )
							*(psem++) = c+1;
					
					uint8_t const * t = sem[1];
					while ( D[*t] )
					{
						RE.tile *= 10;
						RE.tile += *(t++)-'0';
					}
					RE.tile += 1;

					t = sem[2];
					while ( D[*t] )
					{
						RE.x *= 10;
						RE.x += *(t++)-'0';
					}

					t = sem[3];
					while ( D[*t] )
					{
						RE.y *= 10;
						RE.y += *(t++)-'0';
					}
				}
				
				int64_t const rg = p.getReadGroupId(header);
				RE.readGroup = rg + 1;
			}