uint64_t bamrecompress(libmaus2::util::ArgInfo const & arginfo)
{
	int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel()));
	int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());
	int const numthreads = std::max(1,arginfo.getValue<int>("numthreads",getDefaultNumThreads()));

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
			indexfilename = arginfo.getUnparsedValue("indexfilename","");
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	/*
	 * end md5/index callbacks
	 */

	libmaus2::lz::BgzfInflateDeflateParallel::unique_ptr_type BIDP(new libmaus2::lz::BgzfInflateDeflateParallel(std::cin,std::cout,level,numthreads,4*numthreads));

	for ( uint64_t i = 0; i < cbs.size(); ++i )
		BIDP->registerBlockOutputCallback(cbs[i]);

	libmaus2::autoarray::AutoArray<char> B(64*1024,false);
	int r;
	uint64_t t = 0;
	uint64_t last = std::numeric_limits<uint64_t>::max();
	uint64_t lcnt = 0;
	uint64_t const mod = 64*1024*1024;
	libmaus2::timing::RealTimeClock rtc; rtc.start();
	libmaus2::timing::RealTimeClock lrtc; lrtc.start();

	while ( (r = BIDP->read(B.begin(),B.size())) )
	{
		BIDP->write(B.begin(),r);

		lcnt += r;
		t += r;

		if ( t/mod != last/mod )
		{
			if ( verbose )
			{
				if ( isatty(STDERR_FILENO) )
					std::cerr
						<< "\r" << std::string(60,' ') << "\r";

				std::cerr
						<< rtc.formatTime(rtc.getElapsedSeconds()) << " " << t/(1024*1024) << "MB, " << (lcnt/lrtc.getElapsedSeconds())/(1024.0*1024.0) << "MB/s";

				if ( isatty(STDERR_FILENO) )
					std::cerr << std::flush;
				else
					std::cerr << std::endl;
			}

			lrtc.start();
			last = t;
			lcnt = 0;
		}
	}

	if ( verbose )
	{
		if ( isatty(STDERR_FILENO) )
			std::cerr
				<< "\r" << std::string(60,' ') << "\r";

		std::cerr
				<< rtc.formatTime(rtc.getElapsedSeconds()) << " " << t/(1024*1024) << "MB, " << (t/rtc.getElapsedSeconds())/(1024.0*1024.0) << "MB/s";

		std::cerr << std::endl;
	}

	BIDP.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}

	return 0;
}
Exemple #2
0
int main(int argc, char *argv[])
{
	{
		libmaus2::lz::LineSplittingGzipOutputStream LSG("gzsplit",4,17);
		
		for ( uint64_t i = 0; i < 17; ++i )
			LSG << "line_" << i << "\n";		
	}

	{
		libmaus2::lz::LineSplittingGzipOutputStream LSG("nogzsplit",4,17);		
	}

	testGzip();
	testlz4();

	#if 0
	maskBamDuplicateFlag(std::cin,std::cout);
	return 0;
	#endif

	#if 0
	{
		libmaus2::lz::BgzfInflateDeflateParallel BIDP(std::cin,std::cout,Z_DEFAULT_COMPRESSION,32,128);
		libmaus2::autoarray::AutoArray<char> B(64*1024,false);
		int r;
		uint64_t t = 0;
		uint64_t last = std::numeric_limits<uint64_t>::max();
		uint64_t lcnt = 0;
		uint64_t const mod = 64*1024*1024;
		libmaus2::timing::RealTimeClock rtc; rtc.start();
		libmaus2::timing::RealTimeClock lrtc; lrtc.start();

		while ( (r = BIDP.read(B.begin(),B.size())) )
		{
			BIDP.write(B.begin(),r);
			
			lcnt += r;
			t += r;
			
			if ( t/mod != last/mod )
			{
				if ( isatty(STDERR_FILENO) )
					std::cerr 
						<< "\r" << std::string(60,' ') << "\r";

				std::cerr
						<< rtc.formatTime(rtc.getElapsedSeconds()) << " " << t/(1024*1024) << "MB, " << (lcnt/lrtc.getElapsedSeconds())/(1024.0*1024.0) << "MB/s";
				
				if ( isatty(STDERR_FILENO) )
					std::cerr << std::flush;
				else
					std::cerr << std::endl;
				
				lrtc.start();
				last = t;
				lcnt = 0;
			}
		}

		if ( isatty(STDERR_FILENO) )
			std::cerr 
				<< "\r" << std::string(60,' ') << "\r";

		std::cerr
				<< rtc.formatTime(rtc.getElapsedSeconds()) << " " << t/(1024*1024) << "MB, " << (t/rtc.getElapsedSeconds())/(1024.0*1024.0) << "MB/s";
				
		std::cerr << std::endl;

			
		return 0;
	}
	#endif                                                                                                                                                                            

	#if 0
	{
		::libmaus2::lz::BgzfDeflateParallel BDP(std::cout,32,128,Z_DEFAULT_COMPRESSION);
		
		while ( std::cin )
		{
			libmaus2::autoarray::AutoArray<char> B(16384);
			std::cin.read(B.begin(),B.size());
			int64_t const r = std::cin.gcount();
			
			BDP.write(B.begin(),r);
		}
		
		BDP.flush();
		std::cout.flush();
	}
	
	return 0;
	#endif

	#if 0
	{
		try
		{
			libmaus2::lz::BgzfInflateParallel BIP(std::cin /* ,4,16 */);
			uint64_t c = 0;
			uint64_t b = 0;
			uint64_t d = 0;
			libmaus2::timing::RealTimeClock rtc; rtc.start();
			libmaus2::autoarray::AutoArray<uint8_t> adata(64*1024,false);
		
			while ( (d=BIP.read(reinterpret_cast<char *>(adata.begin()),adata.size())) != 0 )
			{
				b += d;
				if ( ++c % (16*1024) == 0 )
				{
					std::cerr << c << "\t" << b/(1024.0*1024.0*1024.0) << "\t" << static_cast<double>(b)/(1024.0*1024.0*rtc.getElapsedSeconds()) << " MB/s" << std::endl;
				}
			}
		
			std::cerr << c << "\t" << b/(1024.0*1024.0*1024.0) << "\t" << static_cast<double>(b)/(1024.0*1024.0*rtc.getElapsedSeconds()) << " MB/s" << std::endl;
			std::cerr << "decoded " << b << " bytes in " << rtc.getElapsedSeconds() << " seconds." << std::endl;
		}
		catch(std::exception const & ex)
		{
			std::cerr << ex.what() << std::endl;
			return EXIT_FAILURE;
		}
	}

	return 0;
	#endif

	std::cerr << "Testing random data on bgzf...";
	testBgzfRandom();
	std::cerr << "done." << std::endl;

	std::cerr << "Testing mono...";	
	testBgzfMono();
	std::cerr << "done." << std::endl;

	::libmaus2::lz::BgzfDeflate<std::ostream> bdefl(std::cout);
	char const * str = "Hello, world.\n";
	bdefl.write(reinterpret_cast<char const *>(str),strlen(str));
	bdefl.flush();
	bdefl.write(reinterpret_cast<char const *>(str),strlen(str));
	bdefl.flush();
	bdefl.addEOFBlock();
	return 0;
	
	::libmaus2::lz::BgzfInflateStream SW(std::cin);

	::libmaus2::autoarray::AutoArray<char> BB(200,false);	
	while ( SW.read(BB.begin(),BB.size()) )
	{
	
	}

	if ( argc < 2 )
		return EXIT_FAILURE;
	
	
	return 0;
	
	#if 0
	::libmaus2::lz::GzipHeader GZH(argv[1]);
	return 0;
	#endif

	std::ostringstream ostr;
	::libmaus2::autoarray::AutoArray<uint8_t> message = ::libmaus2::util::GetFileSize::readFile(argv[1]);
	
	std::cerr << "Deflating message of length " << message.size() << "...";
	::libmaus2::lz::Deflate DEFL(ostr);
	DEFL.write ( reinterpret_cast<char const *>(message.begin()), message.size() );
	DEFL.flush();
	std::cerr << "done." << std::endl;
	
	std::cerr << "Checking output...";
	std::istringstream istr(ostr.str());
	::libmaus2::lz::Inflate INFL(istr);
	int c;
	uint64_t i = 0;
	while ( (c=INFL.get()) >= 0 )
	{
		assert ( c == message[i] );
		i++;
	}
	std::cerr << "done." << std::endl;
	
	// std::cerr << "Message size " << message.size() << std::endl;
	
	std::string testfilename = "test";
	::libmaus2::lz::BlockDeflate BD(testfilename);
	BD.write ( message.begin(), message.size() );
	BD.flush();
	
	uint64_t const decpos = message.size() / 3;
	::libmaus2::lz::BlockInflate BI(testfilename,decpos);
	::libmaus2::autoarray::AutoArray<uint8_t> dmessage (message.size(),false);
	uint64_t const red = BI.read(dmessage.begin()+decpos,dmessage.size());
	assert ( red == dmessage.size()-decpos );
	
	std::cerr << "(";
	for ( uint64_t i = decpos; i < message.size(); ++i )
		assert ( message[i] == dmessage[i] );
	std::cerr << ")\n";
	
	std::string shortmes1("123456789");
	std::string shortmes2("AA");
	std::string shortmes3("BB");
	std::string shortmes4("CC");
	
	std::string textfile1("test1");
	std::string textfile2("test2");
	std::string textfile3("test3");
	std::string textfile4("test4");
	
	::libmaus2::lz::BlockDeflate BD1(textfile1);
	BD1.write ( reinterpret_cast<uint8_t const *>(shortmes1.c_str()), shortmes1.size() );
	BD1.flush();

	::libmaus2::lz::BlockDeflate BD2(textfile2);
	BD2.write ( reinterpret_cast<uint8_t const *>(shortmes2.c_str()), shortmes2.size() );
	BD2.flush();

	::libmaus2::lz::BlockDeflate BD3(textfile3);
	BD3.write ( reinterpret_cast<uint8_t const *>(shortmes3.c_str()), shortmes3.size() );
	BD3.flush();

	::libmaus2::lz::BlockDeflate BD4(textfile4);
	BD4.write ( reinterpret_cast<uint8_t const *>(shortmes4.c_str()), shortmes4.size() );
	BD4.flush();
	
	std::vector < std::string > filenames;
	filenames.push_back(textfile1);
	filenames.push_back(textfile2);
	filenames.push_back(textfile3);
	filenames.push_back(textfile4);
	
	for ( uint64_t j = 0; j <= 15; ++j )
	{
		::libmaus2::lz::ConcatBlockInflate CBI(filenames,j);

		for ( uint64_t i = 0; i < j; ++i )
			std::cerr << ' ';
		for ( uint64_t i = 0; i < CBI.n-j; ++i )
			std::cerr << (char)CBI.get();
		std::cerr << std::endl;
	}
		
	return 0;
}