Exemple #1
0
			static std::pair<int64_t,uint64_t> countBinChunks(stream_type & stream)
			{
				int64_t bin = -1;
				int64_t refid = -1;
				uint64_t chunks = 0;

				::libmaus2::bambam::BamIndexBinChunk BC;
				
				while ( stream.peek() != stream_type::traits_type::eof() )
				{
					stream.read(reinterpret_cast<char *>(&BC),sizeof(::libmaus2::bambam::BamIndexBinChunk));

					if ( refid < 0 )
						refid = BC.refid;
					if ( bin < 0 )
						bin = BC.bin;
					
					if ( 
						refid != static_cast<int64_t>(BC.refid) 
						||
						bin != static_cast<int64_t>(BC.bin)
					)
					{
						stream.clear();
						stream.seekg(-static_cast<int64_t>(sizeof(::libmaus2::bambam::BamIndexBinChunk)),std::ios::cur);
						stream.clear();
						break;
					}
					
					chunks++;
				}
					
				return std::pair<uint64_t,uint64_t>(bin,chunks);
			}
Exemple #2
0
			static std::pair<int64_t,int64_t> getLinearMaxChunk(stream_type & stream)
			{
				int64_t refid = -1;
				int64_t maxpos = 0;
				
				::libmaus2::bambam::BamIndexLinearChunk LC;
				
				while ( stream.peek() != stream_type::traits_type::eof() )
				{
					stream.read(reinterpret_cast<char *>(&LC),sizeof(::libmaus2::bambam::BamIndexLinearChunk));
					
					if ( refid == -1 )
						refid = LC.refid;
					
					// put back element, if it has a different refid	
					if ( static_cast<int64_t>(LC.refid) != refid )
					{
						stream.clear();
						stream.seekg(-static_cast<int64_t>(sizeof(::libmaus2::bambam::BamIndexLinearChunk)),std::ios::cur);
						break;
					}
					
					maxpos = std::max(maxpos,LC.chunkid);
				}
				
				return std::pair<int64_t,int64_t>(refid,maxpos);
			}
Exemple #3
0
			static std::pair<int64_t,uint64_t> countLinearChunks(stream_type & stream)
			{
				int64_t refid = -1;
				uint64_t cnt = 0;
				
				::libmaus2::bambam::BamIndexLinearChunk LC;
				
				while ( stream.peek() != stream_type::traits_type::eof() )
				{
					stream.read(reinterpret_cast<char *>(&LC),sizeof(::libmaus2::bambam::BamIndexLinearChunk));
					
					if ( refid == -1 )
						refid = LC.refid;
					
					// put back element, if it has a different refid	
					if ( static_cast<int64_t>(LC.refid) != refid )
					{
						stream.clear();
						stream.seekg(-static_cast<int64_t>(sizeof(::libmaus2::bambam::BamIndexLinearChunk)),std::ios::cur);
						break;
					}
					
					cnt++;
				}
				
				return std::pair<int64_t,uint64_t>(refid,cnt);
			}
Exemple #4
0
			static bool peekLinearChunk(stream_type & stream, uint64_t const refid, uint64_t const pos, unsigned int const posshift)
			{
				::libmaus2::bambam::BamIndexLinearChunk LC;

				if ( stream.peek() == stream_type::traits_type::eof() )
					return false;
				
				stream.read(reinterpret_cast<char *>(&LC),sizeof(::libmaus2::bambam::BamIndexLinearChunk));
				stream.clear();
				stream.seekg(-static_cast<int64_t>(sizeof(::libmaus2::bambam::BamIndexLinearChunk)),std::ios::cur);
				
				return (LC.refid == refid) && ((LC.pos >> posshift)==(pos>>posshift));
			}
Exemple #5
0
			static bool peekLinearChunk(stream_type & stream, uint64_t const refid, int64_t const chunkid)
			{
				::libmaus2::bambam::BamIndexLinearChunk LC;

				if ( stream.peek() == stream_type::traits_type::eof() )
					return false;
				
				stream.read(reinterpret_cast<char *>(&LC),sizeof(::libmaus2::bambam::BamIndexLinearChunk));
				stream.clear();
				stream.seekg(-static_cast<int64_t>(sizeof(::libmaus2::bambam::BamIndexLinearChunk)),std::ios::cur);
				
				return LC.refid == refid && LC.chunkid == chunkid;
			}
			static bool readAlignmentGz(
				stream_type & GZ,
				::libmaus::bambam::BamAlignment & alignment,
				::libmaus::bambam::BamHeader const * bamheader = 0,
				bool const validate = true
			)
			{
				/* read alignment block size */
				int64_t const bs0 = GZ.get();
				int64_t const bs1 = GZ.get();
				int64_t const bs2 = GZ.get();
				int64_t const bs3 = GZ.get();
				if ( bs3 < 0 )
					// reached end of file
					return false;
				
				/* assemble block size as LE integer */
				alignment.blocksize = (bs0 << 0) | (bs1 << 8) | (bs2 << 16) | (bs3 << 24) ;

				/* read alignment block */
				if ( alignment.blocksize > alignment.D.size() )
					alignment.D = ::libmaus::bambam::BamAlignment::D_array_type(alignment.blocksize,false);
				GZ.read(reinterpret_cast<char *>(alignment.D.begin()),alignment.blocksize);

				if ( static_cast<int64_t>(GZ.gcount()) != static_cast<int64_t>(alignment.blocksize) )
				{
					::libmaus::exception::LibMausException se;
					se.getStream() << "Invalid alignment (EOF in alignment block of length " << alignment.blocksize  << ")" << std::endl;
					se.finish();
					throw se;
				}
				
				if ( validate )
				{
					libmaus_bambam_alignment_validity const validity = bamheader ? alignment.valid(*bamheader) : alignment.valid();
					if ( validity != ::libmaus::bambam::libmaus_bambam_alignment_validity_ok )
					{
						::libmaus::exception::LibMausException se;
						se.getStream() << "Invalid alignment: " << validity << std::endl;
						se.finish();
						throw se;					
					}
				}
				
				return true;
			}
Exemple #7
0
			static int64_t peekBin(stream_type & stream)
			{
				::libmaus2::bambam::BamIndexBinChunk BC;
				
				if ( stream.peek() == stream_type::traits_type::eof() )
					return -1;
					
				stream.read(
					reinterpret_cast<char *>(&BC),
					sizeof(::libmaus2::bambam::BamIndexBinChunk)
				);
				
				assert ( stream.gcount() == sizeof(::libmaus2::bambam::BamIndexBinChunk) );
				
				stream.clear();
				stream.seekg(-static_cast<int64_t>(sizeof(::libmaus2::bambam::BamIndexBinChunk)),std::ios::cur);
				
				return BC.refid;
			}
Exemple #8
0
			void init(stream_type & stream)
			{
				char magic[4];
				
				stream.read(&magic[0],sizeof(magic));
				
				if ( 
					! stream 
					||
					stream.gcount() != 4
					||
					magic[0] != 'B'
					||
					magic[1] != 'A'
					||
					magic[2] != 'I'
					||
					magic[3] != '\1'
				)
				{
					libmaus2::exception::LibMausException ex;
					ex.getStream() << "Failed to read BAI magic BAI\\1." << std::endl;
					ex.finish();
					throw ex;
				}
				
				uint32_t const numref = getLEInteger<stream_type,uint32_t,4>(stream);
				
				refs = libmaus2::autoarray::AutoArray<libmaus2::bambam::BamIndexRef>(numref);
				
				for ( uint64_t i = 0; i < numref; ++i )
				{
					uint32_t const distbins = getLEInteger<stream_type,uint32_t,4>(stream);
					
					#if 0
					std::cerr << "chr " << i << " distbins " << distbins << std::endl;
					#endif
					
					if ( distbins )
					{
						refs[i].bin = libmaus2::autoarray::AutoArray<libmaus2::bambam::BamIndexBin>(distbins,false);
						
						libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > pi(distbins,false);
						libmaus2::autoarray::AutoArray<libmaus2::bambam::BamIndexBin> prebins(distbins,false);
						
						for ( uint64_t j = 0; j < distbins; ++j )
						{
							uint32_t const bin = getLEInteger<stream_type,uint32_t,4>(stream);
							uint32_t const chunks = getLEInteger<stream_type,uint32_t,4>(stream);
							
							// std::cerr << "chr " << i << " bin " << bin << " chunks " << chunks << std::endl;
							
							prebins[j].bin = bin;
							prebins[j].chunks = libmaus2::autoarray::AutoArray<libmaus2::bambam::BamIndexBin::Chunk>(chunks,false);
							
							// read chunks
							for ( uint64_t k = 0; k < chunks; ++k )
							{
								prebins[j].chunks[k].first = getLEInteger<stream_type,uint64_t,8>(stream);
								prebins[j].chunks[k].second = getLEInteger<stream_type,uint64_t,8>(stream);
							}
							
							pi [ j ] = std::pair<uint64_t,uint64_t>(bin,j);
						}
						
						// sort by bin
						std::sort(pi.begin(),pi.end());
					
						// move
						for ( uint64_t j = 0; j < distbins; ++j )
							refs[i].bin[j] = prebins[pi[j].second];							
					}
					
					uint32_t const lins = getLEInteger<stream_type,uint32_t,4>(stream);
					
					if ( lins )
					{
						refs[i].lin.intervals = libmaus2::autoarray::AutoArray<uint64_t>(lins,false);

						for ( uint64_t j = 0; j < lins; ++j )
							refs[i].lin.intervals[j] = getLEInteger<stream_type,uint64_t,8>(stream);
					}
				}
			}