Пример #1
0
int bamsplit(libmaus2::util::ArgInfo const & arginfo)
{
	if ( isatty(STDIN_FILENO) )
	{
		::libmaus2::exception::LibMausException se;
		se.getStream() << "Refusing read binary data from terminal, please redirect standard input to pipe or file." << std::endl;
		se.finish();
		throw se;
	}

	int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel()));
	int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());
	uint64_t const n = arginfo.getValue<int>("n",getDefaultN());
	std::string const prefix = arginfo.getUnparsedValue("prefix",getDefaultFilePrefix(arginfo));

	libmaus2::bambam::BamDecoder bamdec(std::cin);
	libmaus2::bambam::BamAlignment const & algn = bamdec.getAlignment();
	libmaus2::bambam::BamHeader const & header = bamdec.getHeader();
	::libmaus2::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header));

	libmaus2::aio::OutputStreamInstance::unique_ptr_type COS;
	libmaus2::bambam::BamWriter::unique_ptr_type writer;

	uint64_t c = 0;
	uint64_t f = 0;
	while ( bamdec.readAlignment() )
	{
		if ( c++ % n == 0 )
		{
			writer.reset();
			if ( COS )
				COS->flush();
			COS.reset();

			std::ostringstream fnostr;
			fnostr << prefix << "_" << std::setw(6) << std::setfill('0') << f++ << std::setw(0) << ".bam";
			std::string const fn = fnostr.str();

			libmaus2::aio::OutputStreamInstance::unique_ptr_type tCOS(new libmaus2::aio::OutputStreamInstance(fn));
			COS = UNIQUE_PTR_MOVE(tCOS);

			libmaus2::bambam::BamWriter::unique_ptr_type twriter(new libmaus2::bambam::BamWriter(*COS,*uphead,level));
			writer = UNIQUE_PTR_MOVE(twriter);

			if ( verbose )
				std::cerr << "[V] opened file " << fn << std::endl;
		}

		algn.serialise(writer->getStream());
	}

	writer.reset();
	if ( COS )
		COS->flush();
	COS.reset();

	return EXIT_SUCCESS;
}
Пример #2
0
int main()
{
	try
	{
		::libmaus2::bambam::BamDecoder bamdec(std::cin);
		// ::libmaus2::lz::BufferedGzipStream bgs(std::cin);
		::libmaus2::autoarray::AutoArray<char> C(32*1024*1024,false);
		// double const errfreq = 1e-6;
		// srand(time(0));
		::libmaus2::random::Random::setup();
		::libmaus2::bambam::BamWriter writer(std::cout,bamdec.getHeader());

		uint64_t red = 0;
		uint64_t total = 0;
		while ( (bamdec.getStream().read(C.begin(),C.size())) && (red=bamdec.getStream().gcount()) )
		{
			uint64_t off = 0;

			while ( off < red )
			{
				uint64_t const skip =
					std::min(red-off, ::libmaus2::random::Random::rand64() % (64*1024) );
				 // ::libmaus2::random::Random::rand64() % (red-off+1);

				C [ off + skip ] = ::libmaus2::random::Random::rand8();
				off += skip;

				// std::cerr << "changed offset " << total + off << std::endl;
			}


			writer.getStream().write(C.begin(),red);
			total += red;
		}
	}
	catch(std::exception const & ex)
	{
		std::cerr << ex.what() << std::endl;
	}
}
Пример #3
0
int main(int argc, char * argv[])
{
	try
	{
		libmaus::util::ArgInfo const arginfo(argc,argv);

		std::vector<std::string> const & inputfilenames = arginfo.restargs;
		libmaus::bambam::BamMergeCoordinate bamdec(inputfilenames /* ,true */);
		libmaus::bambam::BamAlignment const & algn = bamdec.getAlignment();
		libmaus::bambam::BamHeader const & header = bamdec.getHeader();
		::libmaus::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header));
		libmaus::bambam::BamWriter writer(std::cout,*uphead);
		libmaus::bambam::BamWriter::stream_type & bamoutstr = writer.getStream();
		while ( bamdec.readAlignment() )
			algn.serialise(bamoutstr);
	}
	catch(std::exception const & ex)
	{
		std::cerr << ex.what() << std::endl;
		return EXIT_FAILURE;
	}
}
Пример #4
0
int main(int argc, char * argv[])
{
	uint64_t cnt = 0;

	try
	{
		::libmaus::util::ArgInfo const arginfo(argc,argv);
		
		#if 0
		::libmaus::bambam::BamFormatAuxiliary auxiliary;
		std::string const tmpfilename = arginfo.getDefaultTmpFileName();
		::libmaus::util::TempFileRemovalContainer::setup();
		::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfilename);
		::libmaus::bambam::CollatingBamDecoder bamdec(std::cin,tmpfilename);
		::libmaus::bambam::CollatingBamDecoder::alignment_ptr_type algn;
		::libmaus::autoarray::AutoArray<char> B;
		uint64_t c = 0;
		
		while ( (algn=bamdec.get()) )
		{
			uint64_t const fqlen = algn->getFastQLength();
			if ( fqlen > B.size() )
				B = ::libmaus::autoarray::AutoArray<char>(fqlen);
			char * pe = algn->putFastQ(B.begin());
			
			// std::cout.write(B.begin(),pe-B.begin());
			
			assert ( pe == B.begin() + fqlen );
			
			std::string const reffq = algn->formatFastq(auxiliary);
			// std::cerr << "expecting " << fqlen << " got " << reffq.size() << std::endl;
			assert ( reffq.size() == fqlen );
			for ( uint64_t i = 0; i < fqlen; ++i )
				assert ( reffq[i] == B[i] );
		
			if ( ++c % (1024*1024) == 0 )
			{
				std::cerr << "[V] " << c/(1024*1024) << std::endl;
			}
		}
		#else
		::libmaus::bambam::BamDecoder reader(std::cin);
		::libmaus::bambam::BamHeader & header = reader.bamheader;
		::libmaus::bambam::BamFormatAuxiliary aux;
		
		// std::cout << header.text;
		while ( reader.readAlignment() )
		{
			// std::cout << reader.alignment.formatAlignment(header,aux) << std::endl;

			if ( ++cnt % (1024*1024) == 0 )
				std::cerr << "[V] " << cnt/(1024*1024) << std::endl;
		}		
		#endif
	}
	catch(std::exception const & ex)
	{
		std::cerr << ex.what() << std::endl;
		std::cerr << "cnt=" << cnt << std::endl;	
	}
}
Пример #5
0
int bamsplitmod(libmaus::util::ArgInfo const & arginfo)
{
    if ( isatty(STDIN_FILENO) )
    {
        ::libmaus::exception::LibMausException se;
        se.getStream() << "Refusing read binary data from terminal, please redirect standard input to pipe or file." << std::endl;
        se.finish();
        throw se;
    }

    int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel()));
    int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());
    uint64_t const div = arginfo.getValue<int>("div",getDefaultDiv());
    std::string const prefix = arginfo.getUnparsedValue("prefix",getDefaultFilePrefix(arginfo));

    if ( ! div )
    {
        ::libmaus::exception::LibMausException se;
        se.getStream() << "div cannot be 0." << std::endl;
        se.finish();
        throw se;
    }

    libmaus::bambam::BamDecoder bamdec(std::cin);
    libmaus::bambam::BamAlignment const & algn = bamdec.getAlignment();
    libmaus::bambam::BamHeader const & header = bamdec.getHeader();
    ::libmaus::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header));

    libmaus::autoarray::AutoArray<libmaus::aio::CheckedOutputStream::unique_ptr_type> COS(div);
    libmaus::autoarray::AutoArray<libmaus::bambam::BamWriter::unique_ptr_type> writers(div);
    std::vector < std::string > filenames;
    for ( uint64_t i = 0; i < div; ++i )
    {
        std::ostringstream ostr;
        ostr << prefix << "_" << std::setw(6) << std::setfill('0') << i << std::setw(0) << ".bam";

        libmaus::aio::CheckedOutputStream::unique_ptr_type tCOS(new libmaus::aio::CheckedOutputStream(ostr.str()));
        COS[i] = UNIQUE_PTR_MOVE(tCOS);
        libmaus::bambam::BamWriter::unique_ptr_type twriter(new libmaus::bambam::BamWriter(*COS[i],*uphead,level));
        writers[i] = UNIQUE_PTR_MOVE(twriter);
    }

    uint64_t c = 0;
    if ( verbose )
    {
        while ( bamdec.readAlignment() )
        {
            algn.serialise ( writers [ (c++) % div ] -> getStream() );

            if ( ((c) & ((1ull<<20)-1)) == 0 )
                std::cerr << "[V] " << c << std::endl;
        }
        std::cerr << "[V] " << c << std::endl;
    }
    else
    {
        while ( bamdec.readAlignment() )
            algn.serialise ( writers [ (c++) % div ] -> getStream() );
    }

    for ( uint64_t i = 0; i < div; ++i )
    {
        writers[i].reset();
        COS[i]->flush();
        COS[i].reset();
    }

    return EXIT_SUCCESS;
}
Пример #6
0
int bam12auxmerge(::libmaus2::util::ArgInfo const & arginfo)
{
	if ( isatty(STDIN_FILENO) )
	{
		::libmaus2::exception::LibMausException se;
		se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl;
		se.finish();
		throw se;
	}

	if ( isatty(STDOUT_FILENO) )
	{
		::libmaus2::exception::LibMausException se;
		se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl;
		se.finish();
		throw se;
	}

	std::string const prefilename = arginfo.getRestArg<std::string>(0);
	libmaus2::bambam::BamDecoder bampredec(prefilename);

	int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel()));
	int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());
	int const ranksplit = arginfo.getValue<int>("ranksplit",getDefaultRankSplit());
	int const rankstrip = arginfo.getValue<int>("rankstrip",getDefaultRankSplit());
	int const clipreinsert = arginfo.getValue<int>("clipreinsert",getDefaultClipReinsert());
	int const zztoname = arginfo.getValue<int>("zztoname",getDefaultZZToName());
	int const sanity = arginfo.getValue<int>("sanity",getDefaultSanity());
	uint64_t const mod = arginfo.getValue<int>("mod",getDefaultMod());
	uint64_t const bmod = libmaus2::math::nextTwoPow(mod);
	uint64_t const bmask = bmod-1;

	libmaus2::autoarray::AutoArray<char> Aread;

	::libmaus2::bambam::BamDecoder bamdec(std::cin,false);
	::libmaus2::bambam::BamHeader const & header = bamdec.getHeader();
	::libmaus2::bambam::BamHeader const & preheader = bampredec.getHeader();

	std::string const headertext(header.text);
	std::string const preheadertext(libmaus2::bambam::HeaderLine::removeSequenceLines(preheader.text));

	libmaus2::bambam::ProgramHeaderLineSet headerlines(headertext);
	libmaus2::bambam::ProgramHeaderLineSet preheaderlines(preheadertext);

	std::vector<libmaus2::bambam::HeaderLine> allheaderlines = libmaus2::bambam::HeaderLine::extractLines(headertext);

	std::string const lastid = preheaderlines.getLastIdInChain();

	std::stack < std::pair<uint64_t,std::string> > pgtodo;
	for ( uint64_t i = 0; i < headerlines.roots.size(); ++i )
		pgtodo.push(std::pair<uint64_t,std::string>(headerlines.roots[i],lastid));

	std::string upheadtext = preheadertext;
	while ( pgtodo.size() )
	{
		uint64_t const hid = pgtodo.top().first;
		std::string const PP = pgtodo.top().second;
		pgtodo.pop();
		libmaus2::bambam::HeaderLine const & line = headerlines.lines[hid];

		// ID, PP, PN, CL, VN
		std::string       ID = (line.M.find("ID") != line.M.end()) ? line.M.find("ID")->second : "";
		std::string const PN = (line.M.find("PN") != line.M.end()) ? line.M.find("PN")->second : "";
		std::string const CL = (line.M.find("CL") != line.M.end()) ? line.M.find("CL")->second : "";
		std::string const VN = (line.M.find("VN") != line.M.end()) ? line.M.find("VN")->second : "";

		upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLineRef(
			upheadtext,
			ID,
			PN,
			CL,
			PP,
			VN
		);

		if ( headerlines.edges.find(hid) != headerlines.edges.end() )
		{
			std::vector<uint64_t> const & children = headerlines.edges.find(hid)->second;

			for ( uint64_t j = 0; j < children.size(); ++j )
				pgtodo.push(std::pair<uint64_t,std::string>(children[j],ID));
		}
	}

	/* copy SQ lines */
	std::ostringstream sqconcstr;
	sqconcstr << upheadtext;
	for ( uint64_t i = 0; i < allheaderlines.size(); ++i )
		if ( allheaderlines[i].type == "SQ" )
			sqconcstr << allheaderlines[i].line << "\n";
	upheadtext = sqconcstr.str();

	::libmaus2::bambam::BamHeader uphead(upheadtext);
	uphead.changeSortOrder("unknown");

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
			indexfilename = arginfo.getUnparsedValue("indexfilename","");
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
	if ( cbs.size() )
		Pcbs = &cbs;
	/*
	 * end md5/index callbacks
	 */

	::libmaus2::bambam::BamWriter::unique_ptr_type writer(new ::libmaus2::bambam::BamWriter(std::cout,uphead,level,Pcbs));

	::libmaus2::bambam::BamAlignment & algn = bamdec.getAlignment();
	::libmaus2::bambam::BamAlignment & prealgn = bampredec.getAlignment();
	int64_t curid = -1;

	libmaus2::autoarray::AutoArray< std::pair<uint8_t,uint8_t> > auxpre;
	libmaus2::autoarray::AutoArray< std::pair<uint8_t,uint8_t> > auxnew;

	libmaus2::bambam::BamAuxFilterVector auxfilter;

	// helpers for clipReinsert
	libmaus2::autoarray::AutoArray < std::pair<uint8_t,uint8_t> > auxtags;
	libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> cigop;
	std::stack < libmaus2::bambam::cigar_operation > hardstack;
	libmaus2::bambam::BamAlignment::D_array_type Tcigar;
	libmaus2::bambam::BamAuxFilterVector bafv;
	libmaus2::bambam::BamAuxFilterVector auxfilterout;
	auxfilterout.set('q','s');
	auxfilterout.set('q','q');

	// helpers for zztoname
 	libmaus2::bambam::BamAuxFilterVector zzbafv;
 	zzbafv.set('z','z');

    	// tag filters for secondary/supplementary reads
	libmaus2::bambam::BamAuxFilterVector auxfiltersec;

	auxfiltersec.set('q','s');
	auxfiltersec.set('q','q');
	auxfiltersec.set('a','s');
	auxfiltersec.set('a','h');
	auxfiltersec.set('a','a');
	auxfiltersec.set('a','f');
	auxfiltersec.set('a','r');
	auxfiltersec.set('a','3');

	// loop over aligned BAM file
	while ( bamdec.readAlignment() )
	{
		if ( ranksplit )
			split12(algn);

		// extract rank
		char const * name = algn.getName();
		char const * u1 = name;
		bool ok = true;
		uint64_t rank = 0;
		while ( *u1 && *u1 != '_' )
		{
			rank *= 10;
			rank += (*u1-'0');
			ok = ok && isdigit(*u1);
			++u1;
		}

		// unable to find rank?	write out as is and continue
		if ( ! ok )
		{
			algn.serialise(writer->getStream());
			continue;
		}

		// loop over unaligned BAM file
		while ( curid != static_cast<int64_t>(rank) )
		{
			bool const a_ok = bampredec.readAlignment();

			if ( ! a_ok )
			{
				libmaus2::exception::LibMausException se;
				se.getStream() << "Found unexpected EOF on file " << prefilename << std::endl;
				se.finish();
				throw se;
			}
			assert ( a_ok );
			++curid;

			if ( verbose && (! (curid & bmask)) )
				std::cerr << "[V] " << (curid / bmod) << std::endl;
		}

		if ( verbose > 1 )
			std::cerr << "Merging:\n" << algn.formatAlignment(header) << "\n" << prealgn.formatAlignment(preheader) << std::endl;

		uint64_t pretagnum = prealgn.enumerateAuxTags(auxpre);
		uint64_t newtagnum = algn.enumerateAuxTags(auxnew);

		// do some sanity checking
		if ( sanity )
		{
		    	// first do a name check
			char const * prename = prealgn.getName();
			u1++; // put on the first letter of readname

			if ( verbose > 1 )
    	    	    	    	std::cerr << "Sanity: comparing " << name << " and " << prename << std::endl;

			if ( !is_suffix(prename, u1) ) // names do not match
			{
			    	libmaus2::exception::LibMausException se;
			    	se.getStream() << "Sanity check failed on read names, found " << name << " and " << prename << std::endl;
				se.finish();
				throw se;
			}

			// now the names match so try the flags

			if ( !(algn.isPaired() == prealgn.isPaired() &&
			     algn.isRead1() == prealgn.isRead1() &&
			     algn.isRead2() == prealgn.isRead2()) )
			{
			    	libmaus2::exception::LibMausException se;
				se.getStream() << "Sanity check failed on flags, " << std::endl
				    	       << "Aligned " << name << " paired " << algn.isPaired() << " first " << algn.isRead1() << " last " << algn.isRead2() << std::endl
			    	    	       << "Unaligned " << prename << " paired " << prealgn.isPaired() << " first " << prealgn.isRead1() << " last " << prealgn.isRead2() << std::endl;
				se.finish();
				throw se;
			}

			if ( verbose > 1 )
			    std::cerr << "Sanity check on flags: " << std::endl
				    	       << "Aligned " << name << " paired " << algn.isPaired() << " first " << algn.isRead1() << " last " << algn.isRead2() << std::endl
			    	    	       << "Unaligned " << prename << " paired " << prealgn.isPaired() << " first " << prealgn.isRead1() << " last " << prealgn.isRead2() << std::endl;


		}

		std::sort(auxpre.begin(),auxpre.begin()+pretagnum);
		std::sort(auxnew.begin(),auxnew.begin()+newtagnum);

		if ( verbose > 1 )
			std::cerr << "pretagnum=" << pretagnum << " newtagnum=" << newtagnum << std::endl;

		std::pair<uint8_t,uint8_t> * prec = auxpre.begin();
		std::pair<uint8_t,uint8_t> * pree = prec + pretagnum;
		std::pair<uint8_t,uint8_t> * preo = prec;

		std::pair<uint8_t,uint8_t> * newc = auxnew.begin();
		std::pair<uint8_t,uint8_t> * newe = newc + newtagnum;
		std::pair<uint8_t,uint8_t> * newo = newc;

		while ( prec != pree && newc != newe )
		{
			// pre which is not in new
			if ( *prec < *newc )
			{
				*(preo++) = *(prec++);
			}
			// tag in both, drop pre
			else if ( *prec == *newc )
			{
				*(newo++) = *(newc++);
				prec++;
			}
			// new not in pre
			else
			{
				*(newo++) = *(newc++);
			}
		}

		while ( prec != pree )
			*(preo++) = *(prec++);
		while ( newc != newe )
			*(newo++) = *(newc++);

		pretagnum = preo-auxpre.begin();
		newtagnum = newo-auxnew.begin();

		for ( uint64_t i = 0; i < pretagnum; ++i )
			auxfilter.set(auxpre[i].first,auxpre[i].second);

		algn.copyAuxTags(prealgn, auxfilter);

		for ( uint64_t i = 0; i < pretagnum; ++i )
			auxfilter.clear(auxpre[i].first,auxpre[i].second);

		if ( verbose > 1 )
		{
			std::cerr << "pretagnum=" << pretagnum << " newtagnum=" << newtagnum << std::endl;
			std::cerr << "result: " << algn.formatAlignment(header) << std::endl;
		}


		if ( algn.isSecondary() || algn.isSupplementary() )
		{
		    	// adding adapter clip data to secondary/supplementary reads
			// can lead to incorrect clip reinserts so remove these tags

			algn.filterOutAux(auxfiltersec);
		}


		// copy QC fail flag from original file to aligner output
		if ( prealgn.isQCFail() )
			algn.putFlags( algn.getFlags() | libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_FQCFAIL );

		if ( rankstrip )
			strip12(algn);

		if ( clipreinsert )
			clipReinsert(algn,auxtags,bafv,cigop,Tcigar,hardstack,auxfilterout);

		if ( zztoname )
			zzToRank(algn,zzbafv);

		algn.serialise(writer->getStream());
	}

	writer.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}

	return EXIT_SUCCESS;
}
Пример #7
0
int bamcat(libmaus::util::ArgInfo const & arginfo)
{
	if ( isatty(STDOUT_FILENO) )
	{
		::libmaus::exception::LibMausException se;
		se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl;
		se.finish();
		throw se;
	}

	int const level = arginfo.getValue<int>("level",getDefaultLevel());
	int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());
	
	switch ( level )
	{
		case Z_NO_COMPRESSION:
		case Z_BEST_SPEED:
		case Z_BEST_COMPRESSION:
		case Z_DEFAULT_COMPRESSION:
			break;
		default:
		{
			::libmaus::exception::LibMausException se;
			se.getStream()
				<< "Unknown compression level, please use"
				<< " level=" << Z_DEFAULT_COMPRESSION << " (default) or"
				<< " level=" << Z_BEST_SPEED << " (fast) or"
				<< " level=" << Z_BEST_COMPRESSION << " (best) or"
				<< " level=" << Z_NO_COMPRESSION << " (no compression)" << std::endl;
			se.finish();
			throw se;
		}
			break;
	}

	std::vector<std::string> inputfilenames = arginfo.getPairValues("I");
	
	for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i )
		inputfilenames.push_back(arginfo.restargs[i]);
	
	libmaus::bambam::BamCat bamdec(inputfilenames /* ,true */);
	libmaus::bambam::BamAlignment const & algn = bamdec.getAlignment();
	libmaus::bambam::BamHeader const & header = bamdec.getHeader();
	::libmaus::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header));

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
			indexfilename = arginfo.getUnparsedValue("indexfilename","");
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
	if ( cbs.size() )
		Pcbs = &cbs;
	/*
	 * end md5/index callbacks
	 */


	::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,*uphead,level,Pcbs));
	libmaus::bambam::BamWriter::stream_type & bamoutstr = writer->getStream();
	if ( verbose )
	{
		uint64_t c = 0;
		while ( bamdec.readAlignment() )
		{
			algn.serialise(bamoutstr);
			
			if ( ((++c) & ((1ull<<20)-1)) == 0 )
				std::cerr << "[V] " << c << std::endl;
		}
		
		std::cerr << "[V] " << c << std::endl;
	}
	else
		while ( bamdec.readAlignment() )
			algn.serialise(bamoutstr);

	writer.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}

	return EXIT_SUCCESS;
}
Пример #8
0
int main(int argc, char * argv[])
{
	try
	{
		libmaus2::util::ArgInfo const arginfo(argc,argv);
		libmaus2::timing::RealTimeClock rtc;
		uint64_t const runs = 10;
		std::pair <libmaus2::bambam::BamAlignment const *, libmaus2::bambam::BamAlignment const *> P;
		
		for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i )
		{
			std::string const fn = arginfo.restargs[i];
			double srate = 0, drate = 0;
	
			for ( uint64_t j = 0; j < runs; ++j )		
			{
				rtc.start();
				libmaus2::bambam::BamDecoder bamdec(fn);
				uint64_t cnt = 0;
				while ( bamdec.readAlignment() )
					++cnt;
				double const lela = rtc.getElapsedSeconds();
				
				std::cerr << "[S] " << "cnt=" << cnt << " ela=" << lela 
					<< " rate=" << cnt/lela << std::endl;
					
				srate += cnt/lela;
			}

			for ( uint64_t j = 0; j < runs; ++j )		
			{
				rtc.start();
				libmaus2::aio::InputStreamInstance CIS(fn);
				libmaus2::bambam::BamCircularHashCollatingBamDecoder bamdec(CIS,"tmpfile");
				uint64_t cnt = 0;
				while ( bamdec.tryPair(P) )
				{
					if ( P.first )
						++cnt;
					if ( P.second )
						++cnt;
				}
				libmaus2::aio::FileRemoval::removeFile("tmpfile");
				double const lela = rtc.getElapsedSeconds();
				
				std::cerr << "[D] " << "cnt=" << cnt << " ela=" << lela 
					<< " rate=" << cnt/lela << std::endl;
					
				drate += cnt/lela;
			}
			
			srate /= runs;
			drate /= runs;
			std::cerr << "[Q] " << srate/drate << std::endl;
		}
	}
	catch(std::exception const & ex)
	{
		std::cerr << ex.what() << std::endl;
		return EXIT_FAILURE;
	}
}
Пример #9
0
int bamchecksort(libmaus2::util::ArgInfo const & arginfo)
{
	int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());

	libmaus2::bambam::BamDecoder bamdec(std::cin);
	libmaus2::bambam::BamAlignment & algn = bamdec.getAlignment();
	libmaus2::bambam::BamHeader const & header = bamdec.getHeader();
	std::string const sortorder = libmaus2::bambam::BamHeader::getSortOrderStatic(header.text);
	libmaus2::bambam::BamAlignment prevalgn;
	
	if ( bamdec.readAlignment() )
	{
		prevalgn.swap(algn);
		
		if ( sortorder == "coordinate" )
		{
			uint64_t c = 0;
		
			while ( bamdec.readAlignment() )
			{
				bool const ok =
					(static_cast<uint32_t>(    algn.getRefID()) >
					 static_cast<uint32_t>(prevalgn.getRefID())
					)
					||
					(
						(static_cast<uint32_t>(    algn.getRefID()) ==
						 static_cast<uint32_t>(prevalgn.getRefID())
						)
						&&
						(static_cast<uint32_t>(    algn.getPos()) >=
						 static_cast<uint32_t>(prevalgn.getPos())
						)
					);
					
				if ( ! ok )
				{
					libmaus2::exception::LibMausException se;
					se.getStream() << "Broken order:";
					se.getStream() << prevalgn.formatAlignment(header) << std::endl;
					se.getStream() <<     algn.formatAlignment(header) << std::endl;
					se.finish();
					throw se;
				}
				
				prevalgn.swap(algn);

				if ( verbose && ( ((++c) & ((1ull<<20)-1)) == 0 ) )
					std::cerr << "[V] " << c << std::endl;
			}
			
			if ( verbose )
				std::cerr << "[V] " << c << std::endl;
			
			std::cerr << "Alignments sorted by coordinate." << std::endl;
		}
		else if ( sortorder == "queryname" )
		{
			uint64_t c = 0;
			
			while ( bamdec.readAlignment() )
			{
				// bool const ok = libmaus2::bambam::BamAlignmentNameComparator::compareInt(prevalgn,algn) <= 0;
				bool const ok = 
					!libmaus2::bambam::BamAlignmentNameComparator::compare(algn,prevalgn);

				if ( ! ok )
				{
					libmaus2::exception::LibMausException se;
					se.getStream() << "Broken order:";
					se.getStream() << prevalgn.formatAlignment(header) << std::endl;
					se.getStream() <<     algn.formatAlignment(header) << std::endl;
					se.getStream() << libmaus2::bambam::BamAlignmentNameComparator::compareInt(prevalgn,algn) << std::endl;
					se.finish();
					throw se;
				}

				prevalgn.swap(algn);

				if ( verbose && ( ((++c) & ((1ull<<20)-1)) == 0 ) )
					std::cerr << "[V] " << c << std::endl;
			}

			if ( verbose )
				std::cerr << "[V] " << c << std::endl;

			std::cerr << "Alignments sorted by query name." << std::endl;
		}
		else
		{
			std::cerr << "[V] not checking order for \"" << sortorder << "\"" << std::endl;
		}
	}


	return EXIT_SUCCESS;
}
Пример #10
0
void testInplaceReverseComplement()
{
    ::libmaus2::bambam::BamHeader header;
    header.addChromosome("chr1",2000);

    std::ostringstream ostr;
    {
        ::libmaus2::bambam::BamWriter bamwriter(ostr,header);

        bamwriter.encodeAlignment("name",0,1000,30,
                                  ::libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_FUNMAP, "", 0, 1500, -1, "ACGTATGCA", "HGHGHGHGH"
                                 );
        bamwriter.commit();
    }

    std::istringstream istr(ostr.str());
    libmaus2::bambam::BamDecoder bamdec(istr);

    while ( bamdec.readAlignment() )
    {
        libmaus2::bambam::BamAlignment & algn = bamdec.getAlignment();

        for ( unsigned int k = 0; k <= 10; ++k )
        {
            for ( uint64_t i = 0; i < (1ull << (2*k)); ++i )
            {
                std::string s(k,'A');
                uint64_t t = i;
                for ( uint64_t j = 0; j < k; ++j, t>>=2 )
                {
                    switch ( t & 0x3 )
                    {
                    case 0:
                        s[j] = 'A';
                        break;
                    case 1:
                        s[j] = 'C';
                        break;
                    case 2:
                        s[j] = 'G';
                        break;
                    case 3:
                        s[j] = 'T';
                        break;
                    }
                }

                std::reverse(s.begin(),s.end());

                algn.replaceSequence(s,std::string(k,'H'));

                libmaus2::bambam::BamAlignment::unique_ptr_type ualgn(algn.uclone());

                // std::cout << algn.formatAlignment(header) << std::endl;

                ualgn->reverseComplementInplace();

                // std::cout << ualgn->formatAlignment(header) << std::endl;

                assert ( algn.getReadRC() == ualgn->getRead() );
            }

            std::cerr << "k=" << k << std::endl;
        }
    }
}