예제 #1
0
void getUsedRefSeqs(
	libmaus2::util::ArgInfo const & arginfo,
	libmaus2::bitio::IndexedBitVector::unique_ptr_type & usedrefseq,
	libmaus2::bitio::IndexedBitVector::unique_ptr_type & usedrg,
	libmaus2::bambam::BamHeader::unique_ptr_type & uheader
)
{
	// input decoder wrapper
	libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(
			arginfo,false // put rank
		)
	);
	::libmaus2::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder());
	::libmaus2::bambam::BamAlignmentDecoder & dec = *ppdec;
	::libmaus2::bambam::BamHeader const & header = dec.getHeader();
	::libmaus2::bambam::BamAlignment const & algn = dec.getAlignment();
	uint64_t const numrefseq = header.getNumRef();	
	libmaus2::bitio::IndexedBitVector::unique_ptr_type tusedrefseq(new libmaus2::bitio::IndexedBitVector(numrefseq));
	libmaus2::bitio::IndexedBitVector::unique_ptr_type tusedrg(new libmaus2::bitio::IndexedBitVector(header.getNumReadGroups()));
	
	while ( dec.readAlignment() )
	{
		if ( (!algn.isPaired()) && algn.isMapped() )
		{
			assert ( algn.getRefID() >= 0 );
			assert ( algn.getRefID() < static_cast<int64_t>(tusedrefseq->size()) );
			tusedrefseq->set(algn.getRefID(),true);		
		}
		if ( algn.isPaired() && algn.isMapped() )
		{
			assert ( algn.getRefID() >= 0 );
			assert ( algn.getRefID() < static_cast<int64_t>(tusedrefseq->size()) );
			tusedrefseq->set(algn.getRefID(),true);
		}
		if ( algn.isPaired() && algn.isMateMapped() )
		{
			assert ( algn.getNextRefID() >= 0 );
			assert ( algn.getNextRefID() < static_cast<int64_t>(tusedrefseq->size()) );
			tusedrefseq->set(algn.getNextRefID(),true);
		}
			
		int64_t const rgid = header.getReadGroupId(algn.getReadGroup());
		
		if ( rgid >= 0 )
		{
			assert ( rgid < static_cast<int64_t>(header.getNumReadGroups()) );
			tusedrg->set(rgid,true);
		}
	}
	
	tusedrefseq->setupIndex();
	tusedrg->setupIndex();
	
	usedrefseq = UNIQUE_PTR_MOVE(tusedrefseq);
	usedrg = UNIQUE_PTR_MOVE(tusedrg);
	libmaus2::bambam::BamHeader::unique_ptr_type tuheader(header.uclone());
	uheader= UNIQUE_PTR_MOVE(tuheader);
}
예제 #2
0
int bamseqchksum(::libmaus2::util::ArgInfo const & arginfo)
{
	if ( isatty(STDIN_FILENO) )
	{
		::libmaus2::exception::LibMausException se;
		se.getStream() << "Refusing to read data from terminal, please redirect standard input to pipe or file." << std::endl;
		se.finish();
		throw se;
	}
	
	libmaus2::timing::RealTimeClock rtc;
	rtc.start();
	double prevtime = 0;
	
	int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());
	std::string const hash = arginfo.getValue<std::string>("hash",getDefaultHash());

	// input decoder wrapper
	libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(arginfo));
	::libmaus2::bambam::BamAlignmentDecoder & dec = decwrapper->getDecoder();
	::libmaus2::bambam::BamHeader const & header = dec.getHeader();

	::libmaus2::bambam::BamAlignment & algn = dec.getAlignment();

	libmaus2::bambam::ChecksumsInterface::unique_ptr_type Pchksums(libmaus2::bambam::ChecksumsFactory::construct(hash,header));
	libmaus2::bambam::ChecksumsInterface & chksums = *Pchksums;

	uint64_t c = 0;
	while ( dec.readAlignment() )
	{
		chksums.update(algn);
		
		if ( verbose && (++c & (1024*1024-1)) == 0 )
		{
			double const elapsed = rtc.getElapsedSeconds();
			chksums.printVerbose(std::cerr,c,algn,elapsed-prevtime);
			prevtime = elapsed;
		}
	}

	chksums.printChecksums(std::cout);
	
	if ( verbose )
	{
		std::cerr << "[V] run time " << rtc.getElapsedSeconds() << " (" << rtc.formatTime(rtc.getElapsedSeconds()) << ")" << std::endl;
	}

	return EXIT_SUCCESS;
}
예제 #3
0
int bamheap2(libmaus::util::ArgInfo const & arginfo)
{
	bool const verbose = arginfo.getValue("verbose",getDefaultVerbose());
	std::string const reference = arginfo.getUnparsedValue("reference",std::string());
	std::string const outputprefix = arginfo.getUnparsedValue("outputprefix",std::string());
	
	libmaus::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus::bambam::BamMultiAlignmentDecoderFactory::construct(arginfo));
	::libmaus::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder());
	::libmaus::bambam::BamAlignmentDecoder & dec = *ppdec;
	::libmaus::bambam::BamHeader const & header = dec.getHeader();	
	::libmaus::bambam::BamAlignment const & algn = dec.getAlignment();
	
	double const damult = arginfo.getValue<double>("amult",1);
	double const dcmult = arginfo.getValue<double>("cmult",1);
	double const dgmult = arginfo.getValue<double>("gmult",1);
	double const dtmult = arginfo.getValue<double>("tmult",1);
	double const dpadmult = arginfo.getValue<double>("padmult",1);
	
	double maxmult = 0;
	maxmult = std::max(damult,maxmult);
	maxmult = std::max(dcmult,maxmult);
	maxmult = std::max(dgmult,maxmult);
	maxmult = std::max(dtmult,maxmult);
	maxmult = std::max(dpadmult,maxmult);
	
	uint64_t const amult = std::floor((damult / maxmult) * (1ull<<16) + 0.5);
	uint64_t const cmult = std::floor((dcmult / maxmult) * (1ull<<16) + 0.5);
	uint64_t const gmult = std::floor((dgmult / maxmult) * (1ull<<16) + 0.5);
	uint64_t const tmult = std::floor((dtmult / maxmult) * (1ull<<16) + 0.5);
	uint64_t const padmult = std::floor((dpadmult / maxmult) * (1ull<<16) + 0.5);
	
	libmaus::fastx::FastAIndex::unique_ptr_type Pindex;
	libmaus::aio::CheckedInputStream::unique_ptr_type PCIS;
	if ( reference.size() )
	{
		libmaus::fastx::FastAIndex::unique_ptr_type Tindex(
			libmaus::fastx::FastAIndex::load(reference+".fai")
		);
		Pindex = UNIQUE_PTR_MOVE(Tindex);
		
		libmaus::aio::CheckedInputStream::unique_ptr_type TCIS(new libmaus::aio::CheckedInputStream(reference));
		PCIS = UNIQUE_PTR_MOVE(TCIS);
	}

	libmaus::autoarray::AutoArray<libmaus::bambam::cigar_operation> cigop;
	libmaus::autoarray::AutoArray<char> bases;
	
	int64_t prevrefid = -1;
	std::string refidname = "*";
	
	std::map< uint64_t, HeapEntry > M;
	uint64_t alcnt = 0;
	std::vector< std::pair<char,uint8_t> > pendinginserts;
	int64_t loadedRefId = -1;
	int64_t streamRefId = -1;
	libmaus::autoarray::AutoArray<char> refseqbases;
	ConsensusAccuracy * consacc = 0;
	std::map<uint64_t,ConsensusAccuracy> Mconsacc;
	typedef libmaus::util::shared_ptr<std::ostringstream>::type stream_ptr_type;
	stream_ptr_type Pstream;
	ConsensusAux Caux;
	
	Caux.M['a'] = Caux.M['A'] = amult;
	Caux.M['c'] = Caux.M['C'] = cmult;
	Caux.M['g'] = Caux.M['G'] = gmult;
	Caux.M['t'] = Caux.M['T'] = tmult;
	Caux.M[padsym] = padmult;
	
	while ( dec.readAlignment() )
	{
		if ( algn.isMapped() && (!algn.isQCFail()) )
		{
			assert ( ! pendinginserts.size() );
		
			uint32_t const numcigop = algn.getCigarOperations(cigop);
			uint64_t readpos = 0;
			uint64_t refpos = algn.getPos();
			uint64_t const seqlen = algn.decodeRead(bases);
			uint8_t const * qual = libmaus::bambam::BamAlignmentDecoderBase::getQual(algn.D.begin());
			
			// handle finished columns
			if ( algn.getRefID() != prevrefid )
			{
				while ( M.size() )
				{
					HeapEntry & H = M.begin()->second;
					
					if ( outputprefix.size() && (streamRefId != prevrefid) )
					{
						if ( Pstream )
						{
							std::ostringstream fnostr;
							fnostr << outputprefix << "_" << header.getRefIDName(streamRefId);
							libmaus::aio::PosixFdOutputStream PFOS(fnostr.str());
							PFOS << ">" << header.getRefIDName(streamRefId) << '\n';
							PFOS << Pstream->str() << '\n';
							
							Pstream.reset();
						}
						
						stream_ptr_type Tstream(new std::ostringstream);
						Pstream = Tstream;
						streamRefId = prevrefid;
					}
					
					if ( Pindex && (loadedRefId != prevrefid) )
					{
						refseqbases = Pindex->readSequence(*PCIS, Pindex->getSequenceIdByName(refidname));
						loadedRefId = prevrefid;
						
						if ( Mconsacc.find(loadedRefId) == Mconsacc.end() )
							Mconsacc[loadedRefId] = ConsensusAccuracy(refseqbases.size());
						
						consacc = &(Mconsacc[loadedRefId]);
					}
					
					H.toStream(std::cout,M.begin()->first,refidname,(M.begin()->first < refseqbases.size()) ? static_cast<int>(refseqbases[M.begin()->first]) : -1,Caux,consacc,Pstream.get());
					
					M.erase(M.begin());
				}
			
				prevrefid = algn.getRefID();
				refidname = header.getRefIDName(prevrefid);
			}
			else
			{
				while ( M.size() && M.begin()->first < refpos )
				{
					HeapEntry & H = M.begin()->second;

					if ( outputprefix.size() && (streamRefId != prevrefid) )
					{
						if ( Pstream )
						{
							std::ostringstream fnostr;
							fnostr << outputprefix << "_" << header.getRefIDName(streamRefId);
							libmaus::aio::PosixFdOutputStream PFOS(fnostr.str());
							PFOS << ">" << header.getRefIDName(streamRefId) << '\n';
							PFOS << Pstream->str() << '\n';

							Pstream.reset();
						}
						
						stream_ptr_type Tstream(new std::ostringstream);
						Pstream = Tstream;
						streamRefId = prevrefid;
					}

					if ( Pindex && (loadedRefId != prevrefid) )
					{
						refseqbases = Pindex->readSequence(*PCIS, Pindex->getSequenceIdByName(refidname));
						loadedRefId = prevrefid;

						if ( Mconsacc.find(loadedRefId) == Mconsacc.end() )
							Mconsacc[loadedRefId] = ConsensusAccuracy(refseqbases.size());

						consacc = &(Mconsacc[loadedRefId]);
					}
					
					H.toStream(std::cout,M.begin()->first,refidname,(M.begin()->first < refseqbases.size()) ? static_cast<int>(refseqbases[M.begin()->first]) : -1,Caux,consacc,Pstream.get());
					
					M.erase(M.begin());				
				}
			}
			
			for ( uint64_t ci = 0; ci < numcigop; ++ci )
			{
				uint64_t const ciglen = cigop[ci].second;
				
				switch ( cigop[ci].first )
				{
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CMATCH:
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CEQUAL:
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CDIFF:
					{
						if ( pendinginserts.size() )
						{
							M[refpos].I.push_back(pendinginserts);
							pendinginserts.resize(0);
						}
					
						for ( uint64_t i = 0; i < ciglen; ++i )
						{
							M[refpos].V.push_back(std::make_pair(bases[readpos],qual[readpos]));
							readpos++;
							refpos++;
						}
						break;
					}
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CINS:
					{
						for ( uint64_t i = 0; i < ciglen; ++i, ++readpos )
							pendinginserts.push_back(std::make_pair(bases[readpos],qual[readpos]));
						break;
					}
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CDEL:
						// handle pending inserts
						if ( pendinginserts.size() )
						{
							M[refpos].I.push_back(pendinginserts);
							pendinginserts.resize(0);
						}
						
						// deleting bases from the reference
						for ( uint64_t i = 0; i < ciglen; ++i, ++refpos )
							M[refpos].V.push_back(std::make_pair(padsym,0));
						break;
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CREF_SKIP:
						// handle pending inserts
						if ( pendinginserts.size() )
						{
							M[refpos].I.push_back(pendinginserts);
							pendinginserts.resize(0);
						}

						// skip bases on reference
						for ( uint64_t i = 0; i < ciglen; ++i )
						{
							refpos++;
						}
						break;
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CSOFT_CLIP:
						// skip bases on read
						for ( uint64_t i = 0; i < ciglen; ++i )
						{
							readpos++;
						}
						break;
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CHARD_CLIP:
						break;
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CPAD:
					{
						for ( uint64_t i = 0; i < ciglen; ++i, ++readpos )
							pendinginserts.push_back(std::make_pair(padsym,0));
						break;
					}
				}
			}

			if ( pendinginserts.size() )
			{
				M[refpos].I.push_back(pendinginserts);
				M[refpos].iadd++;
				pendinginserts.resize(0);
			}

			assert ( readpos == seqlen );
		}
		
		if ( verbose && ((++alcnt % (1024*1024)) == 0) )
			std::cerr << "[V] " << alcnt << std::endl;
	}

	while ( M.size() )
	{
		HeapEntry & H = M.begin()->second;

		if ( outputprefix.size() && (streamRefId != prevrefid) )
		{
			if ( Pstream )
			{
				std::ostringstream fnostr;
				fnostr << outputprefix << "_" << header.getRefIDName(streamRefId);
				libmaus::aio::PosixFdOutputStream PFOS(fnostr.str());
				PFOS << ">" << header.getRefIDName(streamRefId) << '\n';
				PFOS << Pstream->str() << '\n';

				Pstream.reset();
			}
			
			stream_ptr_type Tstream(new std::ostringstream);
			Pstream = Tstream;
			streamRefId = prevrefid;
		}

		if ( Pindex && (loadedRefId != prevrefid) )
		{
			refseqbases = Pindex->readSequence(*PCIS, Pindex->getSequenceIdByName(refidname));
			loadedRefId = prevrefid;

			if ( Mconsacc.find(loadedRefId) == Mconsacc.end() )
				Mconsacc[loadedRefId] = ConsensusAccuracy(refseqbases.size());

			consacc = &(Mconsacc[loadedRefId]);
		}
			
		H.toStream(std::cout,M.begin()->first,refidname,(M.begin()->first < refseqbases.size()) ? static_cast<int>(refseqbases[M.begin()->first]) : -1,Caux,consacc,Pstream.get());
		
		M.erase(M.begin());
	}
	
	if ( Pstream )
	{
		std::ostringstream fnostr;
		fnostr << outputprefix << "_" << header.getRefIDName(streamRefId);
		libmaus::aio::PosixFdOutputStream PFOS(fnostr.str());
		PFOS << ">" << header.getRefIDName(streamRefId) << '\n';
		PFOS << Pstream->str() << '\n';

		Pstream.reset();
	}
	
	ConsensusAccuracy constotal;
	for ( std::map<uint64_t,ConsensusAccuracy>::const_iterator ita = Mconsacc.begin(); ita != Mconsacc.end(); ++ita )
	{
		std::cerr << header.getRefIDName(ita->first) << "\t" << ita->second << std::endl;

		std::map<uint64_t,uint64_t> const M = ita->second.depthhistogram.get();
		uint64_t total = 0;
		uint64_t preavg = 0;
		for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita )
		{
			total += aita->second;
			preavg += aita->first * aita->second;
		}

		uint64_t acc = 0;		
		for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita )
		{
			acc += aita->second;
			std::cerr << "H[" << header.getRefIDName(ita->first) << "," << aita->first << ",+]"
				<< "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total
				<< "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl;
		}
		acc = 0;
		for ( std::map<uint64_t,uint64_t>::const_reverse_iterator aita = M.rbegin(); aita != M.rend(); ++aita )
		{
			acc += aita->second;
			std::cerr << "H[" << header.getRefIDName(ita->first) << "," << aita->first << ",-]"
				<< "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total
				<< "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl;
		}
		
		std::cerr << "H[" << header.getRefIDName(ita->first) << ",avg]\t" << 
			static_cast<double>(preavg)/total << std::endl;
		
		constotal += ita->second;
	}
	if ( Mconsacc.size() )
	{
		std::cerr << "all\t" << constotal << std::endl;

		std::map<uint64_t,uint64_t> const M = constotal.depthhistogram.get();
		uint64_t total = 0;
		uint64_t preavg = 0;
		for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita )
		{
			total += aita->second;
			preavg += aita->first * aita->second;
		}

		uint64_t acc = 0;		
		for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita )
		{
			acc += aita->second;
			std::cerr << "H[" << "all" << "," << aita->first << ",+]"
				<< "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total
				<< "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl;
		}
		acc = 0;
		for ( std::map<uint64_t,uint64_t>::const_reverse_iterator aita = M.rbegin(); aita != M.rend(); ++aita )
		{
			acc += aita->second;
			std::cerr << "H[" << "all" << "," << aita->first << ",-]"
				<< "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total
				<< "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl;
		}
		
		std::cerr << "H[all,avg]\t" << static_cast<double>(preavg) / total << std::endl;
		
	}

	return EXIT_SUCCESS;
}
예제 #4
0
void bamalignfrac(::libmaus2::util::ArgInfo const & arginfo)
{
	libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(arginfo));
	::libmaus2::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder());
	::libmaus2::bambam::BamAlignmentDecoder & dec = *ppdec;
	::libmaus2::bambam::BamAlignment const & algn = dec.getAlignment();
        libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> cigop;

        uint64_t basealgn = 0;
        uint64_t clip = 0;
        uint64_t totalbases = 0;

        #if defined(LIBMAUS2_HAVE_REGEX_H)
        std::string const regexs = arginfo.getUnparsedValue("name","");
        libmaus2::util::unique_ptr<libmaus2::regex::PosixRegex>::type regex_ptr;
        if ( regexs.size() )
	{
	        libmaus2::util::unique_ptr<libmaus2::regex::PosixRegex>::type tregex_ptr(new libmaus2::regex::PosixRegex(regexs));
	        regex_ptr = UNIQUE_PTR_MOVE(tregex_ptr);
	}
	#endif

	while ( dec.readAlignment() )
	{
		if ( 
			algn.isMapped()
			#if defined(LIBMAUS2_HAVE_REGEX_H)
			&&
			(
				(!regex_ptr)
				||
				(regex_ptr->findFirstMatch(algn.getName()) != -1)
			)
			#endif
		)
	        {
		        uint32_t const numcig = algn.getCigarOperations(cigop);
		        
		        totalbases += algn.getLseq();
		        
		        for ( uint64_t i = 0; i < numcig; ++i )
		        {
		        	switch ( cigop[i].first )
		        	{
		        		case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CMATCH:
		        		case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CINS:
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CEQUAL:
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CDIFF:
						basealgn += cigop[i].second;
						break;
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CSOFT_CLIP:
						clip += cigop[i].second;
						break;
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CHARD_CLIP:
						totalbases += cigop[i].second;
						clip += cigop[i].second;
						break;
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CDEL:
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CREF_SKIP:
						break;
		        	}
		        }
	        }                                                                        
	}
	
	std::cerr << "total bases in mapped reads\t" << totalbases << std::endl;
	std::cerr << "clipped (hard and soft) bases in mapped reads\t" << clip << std::endl;
	std::cerr << "aligned bases in mapped reads\t" << basealgn << std::endl;
}
예제 #5
0
uint64_t bamheaderfilter(libmaus2::util::ArgInfo const & arginfo)
{
	std::string const inputfilename = arginfo.getUnparsedValue("I","");

	if ( ! inputfilename.size() || inputfilename == "-" )
	{
		::libmaus2::exception::LibMausException se;
		se.getStream() << "No input filename given, please set the I key appropriately." << std::endl;
		se.finish();
		throw se;
	}

	libmaus2::bitio::IndexedBitVector::unique_ptr_type usedrefseq;
	libmaus2::bitio::IndexedBitVector::unique_ptr_type usedrg;
	libmaus2::bambam::BamHeader::unique_ptr_type uheader;

	getUsedRefSeqs(arginfo,usedrefseq,usedrg,uheader);

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
			indexfilename = arginfo.getUnparsedValue("indexfilename","");
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	/*
	 * end md5/index callbacks
	 */

	std::string headertext(uheader->text);
	std::vector<libmaus2::bambam::HeaderLine> hl = libmaus2::bambam::HeaderLine::extractLines(headertext);
	
	std::ostringstream headertextostr;
	uint64_t rscnt = 0;
	uint64_t rgcnt = 0;
	for ( uint64_t i = 0; i < hl.size(); ++i )
	{
		if ( hl[i].type == "SQ" )
		{
			if ( usedrefseq->get(rscnt) )
				headertextostr << hl[i].line << std::endl;

			rscnt += 1;
		}
		else if ( hl[i].type == "RG" )
		{
			if ( usedrg->get(rgcnt) )
				headertextostr << hl[i].line << std::endl;
			
			rgcnt += 1;
		}
		else
		{
			headertextostr << hl[i].line << std::endl;
		}
	}
	headertext = headertextostr.str();

	// add PG line to header
	std::string const upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLine(
		headertext,
		"bamheaderfilter", // ID
		"bamheaderfilter", // PN
		arginfo.commandline, // CL
		::libmaus2::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP
		std::string(PACKAGE_VERSION) // VN			
	);
	// construct new header
	::libmaus2::bambam::BamHeader uphead(upheadtext);
	libmaus2::bambam::BamBlockWriterBase::unique_ptr_type Pout ( libmaus2::bambam::BamBlockWriterBaseFactory::construct(uphead, arginfo, &cbs) );

	// input decoder wrapper
	libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(
			arginfo,false // put rank
		)
	);
	::libmaus2::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder());
	::libmaus2::bambam::BamAlignmentDecoder & dec = *ppdec;
	::libmaus2::bambam::BamAlignment & algn = dec.getAlignment();
	
	while ( dec.readAlignment() )
	{
		if ( (!algn.isPaired()) && algn.isMapped() )
		{
			assert ( algn.getRefID() >= 0 );
			assert ( algn.getRefID() < static_cast<int64_t>(usedrefseq->size()) );
			assert ( usedrefseq->get(algn.getRefID()) );
			assert ( usedrefseq->rank1(algn.getRefID())-1 < uphead.getNumRef() );
			algn.putRefId(usedrefseq->rank1(algn.getRefID())-1);
		}
		if ( algn.isPaired() && algn.isMapped() )
		{
			assert ( algn.getRefID() >= 0 );
			assert ( algn.getRefID() < static_cast<int64_t>(usedrefseq->size()) );
			assert ( usedrefseq->get(algn.getRefID()) );
			assert ( usedrefseq->rank1(algn.getRefID())-1 < uphead.getNumRef() );
			algn.putRefId(usedrefseq->rank1(algn.getRefID())-1);
		}
		if ( algn.isPaired() && algn.isMateMapped() )
		{
			assert ( algn.getNextRefID() >= 0 );
			assert ( algn.getNextRefID() < static_cast<int64_t>(usedrefseq->size()) );
			assert ( usedrefseq->get(algn.getNextRefID()) );
			assert ( usedrefseq->rank1(algn.getNextRefID())-1 < uphead.getNumRef() );
			algn.putNextRefId(usedrefseq->rank1(algn.getNextRefID())-1);
		}
		
		// erase unmapped refid and pos
		if ( algn.isUnmap() )
		{
			algn.putRefId(-1);
			algn.putPos(-1);
		}
		if ( algn.isMateUnmap() )
		{
			algn.putNextRefId(-1);
			algn.putNextPos(-1);
		}

		Pout->writeAlignment(algn);
	}


	Pout.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}
	
	return 0;
}
예제 #6
0
int bamvalidateTemplate(::libmaus2::util::ArgInfo const & arginfo)
{
	libmaus2::timing::RealTimeClock rtc; rtc.start();
	bool const verbose = arginfo.getValue("verbose",getDefaultVerbose());
	bool const basequalhist = arginfo.getValue("basequalhist",getDefaultBaseQualHist());

	libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(
			arginfo,false // put rank
		)
	);
	::libmaus2::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder());
	::libmaus2::bambam::BamAlignmentDecoder & dec = *ppdec;
	::libmaus2::bambam::BamHeader const & header = dec.getHeader();
	::libmaus2::bambam::BamAlignment const & algn = dec.getAlignment();

	// add PG line to header
	std::string const upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLine(
		header.text,
		"bamvalidate", // ID
		"bamvalidate", // PN
		arginfo.commandline, // CL
		::libmaus2::bambam::ProgramHeaderLineSet(header.text).getLastIdInChain(), // PP
		std::string(PACKAGE_VERSION) // VN
	);
	// construct new header
	::libmaus2::bambam::BamHeader uphead(upheadtext);

	/*
	 * start index/md5 callbacks and alignment writer
	 */
	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	libmaus2::bambam::BamBlockWriterBase::unique_ptr_type Pout;

	if ( passthrough )
	{
		std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
		std::string const tmpfileindex = tmpfilenamebase + "_index";
		::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

		if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
		{
			if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
				md5filename = arginfo.getUnparsedValue("md5filename","");
			else
				std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

			if ( md5filename.size() )
			{
				::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5);
				Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
				cbs.push_back(Pmd5cb.get());
			}
		}
		if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
		{
			if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
				indexfilename = arginfo.getUnparsedValue("indexfilename","");
			else
				std::cerr << "[V] no filename for index given, not creating index" << std::endl;

			if ( indexfilename.size() )
			{
				libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
				Pindex = UNIQUE_PTR_MOVE(Tindex);
				cbs.push_back(Pindex.get());
			}
		}
		std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
		if ( cbs.size() )
			Pcbs = &cbs;


		libmaus2::bambam::BamBlockWriterBase::unique_ptr_type Tout (
			libmaus2::bambam::BamBlockWriterBaseFactory::construct(uphead, arginfo, Pcbs)
		);
		Pout = UNIQUE_PTR_MOVE(Tout);
	}

	libmaus2::autoarray::AutoArray<char> lastvalidname(256); // max valid read name is 255 bytes
	uint64_t alsok = 0;

	::libmaus2::autoarray::AutoArray<char> qual;
	libmaus2::autoarray::AutoArray<uint64_t> H(static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())+1);
	std::fill(H.begin(),H.end(),0ull);

	try
	{
		while ( dec.readAlignment() )
		{
			if ( passthrough )
				Pout->writeAlignment(algn);

			if ( basequalhist )
			{
				uint64_t const l = algn.getLseq();
				uint8_t const * Qc = libmaus2::bambam::BamAlignmentDecoderBase::getQual(algn.D.begin());
				uint8_t const * const Qe = Qc + l;

				while ( Qc != Qe )
					H[*(Qc++)]++;
			}

			uint64_t const lname = algn.getLReadName();
			char const * name = algn.getName();
			std::copy(name,name+lname+1,lastvalidname.begin());

			alsok += 1;
		}
	}
	catch(std::exception const & ex)
	{
		std::cerr << "[E] name of last valid alignment was " << lastvalidname.begin() << std::endl;
		std::cerr << "[E] read " << alsok << " valid alignments" << std::endl;
		throw;
	}

	Pout.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}

	if ( verbose )
		std::cerr << "[V] checked " << alsok << " alignments in " << rtc.formatTime(rtc.getElapsedSeconds())
			<< " (" << alsok / rtc.getElapsedSeconds() << " al/s)" << std::endl;

	if ( basequalhist )
	{
		uint64_t const s = std::accumulate(H.begin(),H.end(),0ull);

		uint64_t a = 0;
		uint64_t minq = std::numeric_limits<uint64_t>::max();
		uint64_t maxq = 0;

		for ( uint64_t i = 0; i < H.size(); ++i )
			if ( H[i] )
			{
				minq = std::min(minq,i);
				maxq = std::max(maxq,i);

				a += H[i];

				std::cerr
					<< "[H]\t" << i << "\t";

				if ( ( static_cast<uint64_t>(i+33) < static_cast<uint64_t>(std::numeric_limits<char>::max()) && isprint(i+33)) )
					std::cerr << static_cast<char>(i+33);

				std::cerr << "\t"
					<< H[i] << "\t"
					<< (H[i] / static_cast<double>(s)) << "\t"
					<< (a / static_cast<double>(s))
					<< std::endl;
			}

		if ( s )
		{
			std::cerr << "[H]\tmin\t" << minq << "\t";
			if ( ( static_cast<uint64_t>(minq+33) < static_cast<uint64_t>(std::numeric_limits<char>::max()) && isprint(minq+33)) )
				std::cerr << static_cast<char>(minq+33);
			std::cerr << std::endl;
			std::cerr << "[H]\tmax\t" << maxq << "\t";
			if ( ( static_cast<uint64_t>(maxq+33) < static_cast<uint64_t>(std::numeric_limits<char>::max()) && isprint(maxq+33)) )
				std::cerr << static_cast<char>(maxq+33);
			std::cerr << std::endl;
		}
	}

	return EXIT_SUCCESS;
}
예제 #7
0
int bamrecalculatecigar(libmaus2::util::ArgInfo const & arginfo)
{
	if ( isatty(STDOUT_FILENO) )
	{
		::libmaus2::exception::LibMausException se;
		se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl;
		se.finish();
		throw se;
	}

	int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());
	// input decoder wrapper
	libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(
			arginfo,false // put rank
		)
	);

	libmaus2::bambam::BamAlignmentDecoder & bamdec = decwrapper->getDecoder();
	libmaus2::bambam::BamAlignment & algn = bamdec.getAlignment();
	libmaus2::bambam::BamHeader const & header = bamdec.getHeader();
	::libmaus2::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header));

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
			indexfilename = arginfo.getUnparsedValue("indexfilename","");
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
	if ( cbs.size() )
		Pcbs = &cbs;
	/*
	 * end md5/index callbacks
	 */

	libmaus2::bambam::BamBlockWriterBase::unique_ptr_type writer(
		libmaus2::bambam::BamBlockWriterBaseFactory::construct(*uphead, arginfo, Pcbs)
	);

	libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> cigopin;
	libmaus2::autoarray::AutoArray<char> readdata;
	libmaus2::bambam::BamAlignment::D_array_type T;

	if ( ! arginfo.hasArg("reference") )
	{
		libmaus2::exception::LibMausException se;
		se.getStream() << "reference key is missing." << std::endl;
		se.finish();
		throw se;
	}

	std::string const reference = arginfo.getUnparsedValue("reference","");

	if ( ! libmaus2::util::GetFileSize::fileExists(reference) )
	{
		libmaus2::exception::LibMausException se;
		se.getStream() << "file " << reference << " does not exist." << std::endl;
		se.finish();
		throw se;
	}

	libmaus2::fastx::FastAIndex::unique_ptr_type FAindex(libmaus2::fastx::FastAIndex::load(reference + ".fai"));
	libmaus2::aio::InputStreamInstance FAISI(reference);

	uint64_t c = 0;
	libmaus2::autoarray::AutoArray<char> ref;
	int64_t refloaded = -1;

	while ( bamdec.readAlignment() )
	{
		if ( algn.isMapped() )
		{
			assert ( algn.getRefID() >= 0 );
			if ( algn.getRefID() != refloaded )
			{
				if ( algn.getRefID() < refloaded )
				{
					libmaus2::exception::LibMausException lme;
					lme.getStream() << "bamrecalculatecigar: file is not sorted by coordinate" << std::endl;
					lme.finish();
					throw lme;
				}

				ref = FAindex->readSequence(FAISI,algn.getRefID());
				refloaded = algn.getRefID();
			}

			uint64_t const numcig = libmaus2::bambam::BamAlignmentDecoderBase::recalculateCigar(
				algn.D.begin(),
				ref.begin() + algn.getPos(),
				cigopin,
				readdata
			);
			algn.replaceCigarString(cigopin,numcig,T);
		}

		writer->writeAlignment(algn);

		if ( ((++c) & ((1ull<<20)-1)) == 0 && verbose )
			std::cerr << "[V] " << c << std::endl;
	}

	if ( verbose )
		std::cerr << "[V] " << c << std::endl;

	writer.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}

	return EXIT_SUCCESS;
}
예제 #8
0
int bamfiltermc(libmaus2::util::ArgInfo const & arginfo)
{
	bool const verbose = arginfo.getValue("verbose",getDefaultVerbose());

	libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(arginfo));
	::libmaus2::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder());
	::libmaus2::bambam::BamAlignmentDecoder & dec = *ppdec;
	::libmaus2::bambam::BamHeader const & header = dec.getHeader();
	::libmaus2::bambam::BamAlignment & algn = dec.getAlignment();
	std::string const tmpfilenamebase = arginfo.getUnparsedValue("tmpfile",arginfo.getDefaultTmpFileName());
	uint64_t const numthreads = arginfo.getValueUnsignedNumeric<uint64_t>("numthreads",getDefaultNumThreads());

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( libmaus2::bambam::BamBlockWriterBaseFactory::getMD5FileName(arginfo) != std::string() )
			md5filename = libmaus2::bambam::BamBlockWriterBaseFactory::getMD5FileName(arginfo);
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( libmaus2::bambam::BamBlockWriterBaseFactory::getIndexFileName(arginfo) != std::string() )
			indexfilename = libmaus2::bambam::BamBlockWriterBaseFactory::getIndexFileName(arginfo);
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
	if ( cbs.size() )
		Pcbs = &cbs;
	/*
	 * end md5/index callbacks
	 */

	::libmaus2::bambam::BamHeader::unique_ptr_type genuphead(
		libmaus2::bambam::BamHeaderUpdate::updateHeader(arginfo,header,"bamfiltermc",std::string(PACKAGE_VERSION))
	);
	libmaus2::bambam::BamBlockWriterBase::unique_ptr_type Pwriter(libmaus2::bambam::BamBlockWriterBaseFactory::construct(*genuphead,arginfo,Pcbs));
	libmaus2::bambam::BamBlockWriterBase & wr = *Pwriter;

	// freelist size
	uint64_t const flsize = 16*1024;
	libmaus2::util::FreeList < libmaus2::bambam::BamAlignment, BamAlignmentFreeListDefaultAllocator, BamAlignmentFreeListDefaultTypeInfo > FL(flsize);
	libmaus2::util::SimpleQueue < libmaus2::bambam::BamAlignment::shared_ptr_type > Q;

	libmaus2::bambam::BamAuxFilterVector auxvec;
	auxvec.set('M','C');

	uint64_t alcnt = 0;
	while ( dec.readAlignment() )
	{
		if ( FL.empty() )
			handleQueue(Q,FL,wr,auxvec,numthreads);
		assert ( ! FL.empty() );

		libmaus2::bambam::BamAlignment::shared_ptr_type P = FL.get();
		P->swap(algn);

		Q.push_back(P);

		if ( verbose && ((++alcnt % (1024*1024)) == 0) )
			std::cerr << "[V] " << alcnt << std::endl;
	}

	handleQueue(Q,FL,wr,auxvec,numthreads);

	// reset BAM writer
	Pwriter.reset();

	if ( Pmd5cb )
		Pmd5cb->saveDigestAsFile(md5filename);
	if ( Pindex )
		Pindex->flush(std::string(indexfilename));

	return EXIT_SUCCESS;
}
예제 #9
0
파일: bamsort.cpp 프로젝트: dozy/biobambam
int bamsort(::libmaus::util::ArgInfo const & arginfo)
{
	::libmaus::util::TempFileRemovalContainer::setup();
	
	bool const inputisstdin = (!arginfo.hasArg("I")) || (arginfo.getUnparsedValue("I","-") == "-");
	bool const outputisstdout = (!arginfo.hasArg("O")) || (arginfo.getUnparsedValue("O","-") == "-");

	if ( isatty(STDIN_FILENO) && inputisstdin && (arginfo.getValue<std::string>("inputformat","bam") != "sam") )
	{
		::libmaus::exception::LibMausException se;
		se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl;
		se.finish();
		throw se;
	}

	if ( isatty(STDOUT_FILENO) && outputisstdout && (arginfo.getValue<std::string>("outputformat","bam") != "sam") )
	{
		::libmaus::exception::LibMausException se;
		se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl;
		se.finish();
		throw se;
	}

	int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());
	bool const disablevalidation = arginfo.getValue<int>("disablevalidation",getDefaultDisableValidation());

	std::string const inputformat = arginfo.getUnparsedValue("inputformat",getDefaultInputFormat());
	int const level = arginfo.getValue<int>("level",getDefaultLevel());
	switch ( level )
	{
		case Z_NO_COMPRESSION:
		case Z_BEST_SPEED:
		case Z_BEST_COMPRESSION:
		case Z_DEFAULT_COMPRESSION:
			break;
		default:
		{
			::libmaus::exception::LibMausException se;
			se.getStream()
				<< "Unknown compression level, please use"
				<< " level=" << Z_DEFAULT_COMPRESSION << " (default) or"
				<< " level=" << Z_BEST_SPEED << " (fast) or"
				<< " level=" << Z_BEST_COMPRESSION << " (best) or"
				<< " level=" << Z_NO_COMPRESSION << " (no compression)" << std::endl;
			se.finish();
			throw se;
		}
			break;
	}

	// prefix for tmp files
	std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
	std::string const tmpfilenameout = tmpfilenamebase + "_bamsort";
	::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfilenameout);
	uint64_t blockmem = arginfo.getValue<uint64_t>("blockmb",getDefaultBlockSize())*1024*1024;
	std::string const sortorder = arginfo.getValue<std::string>("SO","coordinate");
	bool const fixmates = arginfo.getValue<int>("fixmates",getDefaultFixMates());
	uint64_t sortthreads = arginfo.getValue<uint64_t>("sortthreads",getDefaultSortThreads());

	// input decoder wrapper
	libmaus::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus::bambam::BamMultiAlignmentDecoderFactory::construct(
			arginfo,false // put rank
		)
	);
	::libmaus::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder());
	::libmaus::bambam::BamAlignmentDecoder & dec = *ppdec;
	if ( disablevalidation )
		dec.disableValidation();
	::libmaus::bambam::BamHeader const & header = dec.getHeader();

	std::string const headertext(header.text);

	// add PG line to header
	std::string const upheadtext = ::libmaus::bambam::ProgramHeaderLineSet::addProgramLine(
		headertext,
		"bamsort", // ID
		"bamsort", // PN
		arginfo.commandline, // CL
		::libmaus::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP
		std::string(PACKAGE_VERSION) // VN			
	);
	// construct new header
	::libmaus::bambam::BamHeader uphead(upheadtext);

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
			indexfilename = arginfo.getUnparsedValue("indexfilename","");
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
	if ( cbs.size() )
		Pcbs = &cbs;
	/*
	 * end md5/index callbacks
	 */
	if ( sortorder != "queryname" )
		uphead.changeSortOrder("coordinate");
	else
		uphead.changeSortOrder("queryname");

	libmaus::bambam::BamBlockWriterBase::unique_ptr_type Pout ( libmaus::bambam::BamBlockWriterBaseFactory::construct(uphead, arginfo, Pcbs) );

	if ( fixmates )
	{
		if ( sortorder != "queryname" )
		{
			::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentPosComparator > 
				BEC(blockmem,tmpfilenameout,sortthreads);

			if ( verbose )
				std::cerr << "[V] Reading alignments from source." << std::endl;
			uint64_t incnt = 0;

			// current alignment
			libmaus::bambam::BamAlignment & curalgn = dec.getAlignment();
			// previous alignment
			libmaus::bambam::BamAlignment prevalgn;
			// previous alignment valid
			bool prevalgnvalid = false;
			// MQ field filter
			libmaus::bambam::BamAuxFilterVector MQfilter;
			MQfilter.set("MQ");
			
			while ( dec.readAlignment() )
			{
				if ( curalgn.isSecondary() || curalgn.isSupplementary() )
				{
					BEC.putAlignment(curalgn);
				}
				else if ( prevalgnvalid )
				{
					// different name
					if ( strcmp(curalgn.getName(),prevalgn.getName()) )
					{
						BEC.putAlignment(prevalgn);
						curalgn.swap(prevalgn);
					}
					// same name
					else
					{
						libmaus::bambam::BamAlignment::fixMateInformation(prevalgn,curalgn,MQfilter);
						BEC.putAlignment(prevalgn);
						BEC.putAlignment(curalgn);
						prevalgnvalid = false;
					}
				}
				else
				{
					prevalgn.swap(curalgn);
					prevalgnvalid = true;
				}
				
				if ( verbose && ( ( ++incnt & ((1ull<<20)-1) ) == 0 ) )
					std::cerr << "[V] " << incnt << std::endl;
			}
			
			if ( prevalgnvalid )
			{
				BEC.putAlignment(prevalgn);
				prevalgnvalid = false;
			}

			if ( verbose )
				std::cerr << "[V] read " << incnt << " alignments" << std::endl;

			// BEC.createOutput(std::cout, uphead, level, verbose, Pcbs);
			BEC.createOutput(*Pout, verbose);
		}
		else
		{
			::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentNameComparator > 
				BEC(blockmem,tmpfilenameout,sortthreads);
			
			if ( verbose )
				std::cerr << "[V] Reading alignments from source." << std::endl;
			uint64_t incnt = 0;
			
			// current alignment
			libmaus::bambam::BamAlignment & curalgn = dec.getAlignment();
			// previous alignment
			libmaus::bambam::BamAlignment prevalgn;
			// previous alignment valid
			bool prevalgnvalid = false;
			// MQ field filter
			libmaus::bambam::BamAuxFilterVector MQfilter;
			MQfilter.set("MQ");
			
			while ( dec.readAlignment() )
			{
				if ( curalgn.isSecondary() || curalgn.isSupplementary() )
				{
					BEC.putAlignment(curalgn);
				}
				else if ( prevalgnvalid )
				{
					// different name
					if ( strcmp(curalgn.getName(),prevalgn.getName()) )
					{
						BEC.putAlignment(prevalgn);
						curalgn.swap(prevalgn);
					}
					// same name
					else
					{
						libmaus::bambam::BamAlignment::fixMateInformation(prevalgn,curalgn,MQfilter);
						BEC.putAlignment(prevalgn);
						BEC.putAlignment(curalgn);
						prevalgnvalid = false;
					}
				}
				else
				{
					prevalgn.swap(curalgn);
					prevalgnvalid = true;
				}
				
				if ( verbose && ( ( ++incnt & ((1ull<<20)-1) ) == 0 ) )
					std::cerr << "[V] " << incnt << std::endl;
			}
			
			if ( prevalgnvalid )
			{
				BEC.putAlignment(prevalgn);
				prevalgnvalid = false;
			}
			
			if ( verbose )
				std::cerr << "[V] read " << incnt << " alignments" << std::endl;

			// BEC.createOutput(std::cout, uphead, level, verbose, Pcbs);
			BEC.createOutput(*Pout, verbose);
		}
	}
	else
	{
		if ( sortorder != "queryname" )
		{
			::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentPosComparator > BEC(blockmem,tmpfilenameout,sortthreads);

			if ( verbose )
				std::cerr << "[V] Reading alignments from source." << std::endl;
			uint64_t incnt = 0;
			
			while ( dec.readAlignment() )
			{
				BEC.putAlignment(dec.getAlignment());
				incnt++;
				if ( verbose && (incnt % (1024*1024) == 0) )
					std::cerr << "[V] " << incnt/(1024*1024) << "M" << std::endl;
			}

			if ( verbose )
				std::cerr << "[V] read " << incnt << " alignments" << std::endl;

			// BEC.createOutput(std::cout, uphead, level, verbose, Pcbs);
			BEC.createOutput(*Pout, verbose);
		}
		else
		{
			::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentNameComparator > BEC(blockmem,tmpfilenameout,sortthreads);
			
			if ( verbose )
				std::cerr << "[V] Reading alignments from source." << std::endl;
			uint64_t incnt = 0;
			
			while ( dec.readAlignment() )
			{
				BEC.putAlignment(dec.getAlignment());
				incnt++;
				if ( verbose && (incnt % (1024*1024) == 0) )
					std::cerr << "[V] " << incnt/(1024*1024) << "M" << std::endl;
			}
			
			if ( verbose )
				std::cerr << "[V] read " << incnt << " alignments" << std::endl;

			// BEC.createOutput(std::cout, uphead, level, verbose, Pcbs);
			BEC.createOutput(*Pout, verbose);
		}
	}

	// flush encoder so callbacks see all output data
	Pout.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}

	return EXIT_SUCCESS;
}