Пример #1
0
void testcompact()
{
	std::string const fn("tmpfile");
	#if 0
	std::string const fn2("tmpfile2");
	std::string const fnm("tmpfile.merged");
	#endif

	::libmaus::util::TempFileRemovalContainer::setup();
	::libmaus::util::TempFileRemovalContainer::addTempFile(fn);
	
	uint64_t n = 1024*1024;
	unsigned int const b = 3;
	::libmaus::bitio::CompactArray CA(n,b);
	::libmaus::bitio::CompactArrayWriter CAW(fn,n,b);
	srand(time(0));
	for ( uint64_t i = 0; i < n; ++i )
	{
		CA.set(i,rand() & ((1ull<<b)-1));
		CAW.put(CA.get(i));
	}
	CAW.flush();
	#if 0
	::libmaus::aio::CheckedOutputStream COS(fn);
	CA.serialize(COS);
	COS.flush();
	COS.close();
	#endif
	
	::libmaus::aio::CheckedInputStream CIS(fn);
	std::cerr << "compact file size is " << ::libmaus::util::GetFileSize::getFileSize(CIS) << std::endl;
	assert ( static_cast< ::std::streampos > (CIS.tellg()) == static_cast< ::std::streampos >(0) );
	assert ( CIS.get() >= 0 );
	
	::libmaus::bitio::CompactDecoderWrapper W(fn,4096);
	
	W.seekg(0,std::ios::end);
	int64_t const fs = W.tellg();
	W.seekg(0,std::ios::beg);
	W.clear();
	
	assert ( fs == static_cast<int64_t>(n) );
	
	std::cerr << "n=" << n << " fs=" << fs << std::endl;
	
	for ( uint64_t i = 0; i < n; ++i )
	{
		assert ( W.tellg() == static_cast< ::std::streampos >(i) );
		int const v = W.get();
		assert ( v == static_cast<int>(CA[i]) );
		// std::cerr << static_cast<int>(W.get()) << " " << CA[i] << std::endl;
	}
	
	for ( uint64_t i = 0; i < n; i += (rand() % 256) )
	{
		W.clear();
		W.seekg(i);
		
		std::cerr << "seek to " << W.tellg() << std::endl;
		
		for ( uint64_t j = i; j < n; ++j )
		{
			assert ( W.tellg() == static_cast< ::std::streampos >(j) );
			int const v = W.get();
			assert ( v == static_cast<int>(CA[j]) );
		}
		
		uint64_t ii = n-i;
		W.clear();
		W.seekg(ii);

		for ( uint64_t j = ii; j < n; ++j )
		{
			assert ( W.tellg() == static_cast< ::std::streampos >(j) );
			int const v = W.get();
			assert ( v == static_cast<int>(CA[j]) );
		}
	}
}
Пример #2
0
int main(int argc, char * argv[])
{
	try
	{
		::libmaus::util::ArgInfo const arginfo(argc,argv);
		std::string const input = arginfo.getRestArg<std::string>(0);
		std::string const output = arginfo.getRestArg<std::string>(1);
		unsigned int const verbose = arginfo.getValue<unsigned int>("verbose",1);
		unsigned int const addterm = arginfo.getValue<unsigned int>("addterm",0) ? 1 : 0;

		::libmaus::autoarray::AutoArray<uint64_t> const chist = computeCharHist(input);
		uint64_t maxsym = 0;
		for ( uint64_t i = 0; i < chist.size(); ++i )
			if ( chist[i] )
				maxsym = i;
		if ( addterm )
			maxsym += 1;
		unsigned int const b = maxsym ? (64-::libmaus::bitio::Clz::clz(maxsym)) : 0;

		uint64_t const n = std::accumulate(chist.begin(),chist.end(),0ull);
		if ( verbose )
			std::cerr << "[V] n=" << n << " maxsym=" << maxsym << " b=" << b << std::endl;				

		uint64_t const blocksize = 8*1024;
		uint64_t const numblocks = (n+blocksize-1)/blocksize;
		::libmaus::autoarray::AutoArray<uint8_t> B(blocksize);
		::libmaus::aio::CheckedInputStream CIS(input);
		::libmaus::bitio::CompactArrayWriter CAW(output,n+addterm,b);
		int64_t lastperc = -1;
		
		if ( verbose )
			std::cerr << "[V] ";
			
		for ( uint64_t b = 0; b < numblocks; ++b )
		{
			uint64_t const low = std::min(b*blocksize,n);
			uint64_t const high = std::min(low+blocksize,n);
			uint64_t const range = high-low;
			
			CIS.read ( reinterpret_cast<char *>(B.begin()), range );
			assert ( CIS.gcount() == static_cast<int64_t>(range) );
			
			if ( addterm )
				for ( uint64_t i = 0; i < range; ++i )
					B[i] += 1;
			
			CAW.write(B.begin(),range);
			
			int64_t const newperc = (high * 100) / n;
			if ( verbose && newperc != lastperc )
			{
				lastperc = newperc;
				std::cerr << "(" << newperc << ")";
			}
		}
		if ( addterm )
			CAW.put(0);
		if ( verbose )
			std::cerr << std::endl;
		
		CAW.flush();
		
		#if 0
		::libmaus::bitio::CompactDecoderWrapper CDW(output);
		for ( uint64_t i = 0; i < n+addterm; ++i )
			std::cerr << CDW.get();
		std::cerr << std::endl;
		#endif
	}
	catch(std::exception const & ex)
	{
		std::cerr << ex.what() << std::endl;
	}
}