void testcompact() { std::string const fn("tmpfile"); #if 0 std::string const fn2("tmpfile2"); std::string const fnm("tmpfile.merged"); #endif ::libmaus::util::TempFileRemovalContainer::setup(); ::libmaus::util::TempFileRemovalContainer::addTempFile(fn); uint64_t n = 1024*1024; unsigned int const b = 3; ::libmaus::bitio::CompactArray CA(n,b); ::libmaus::bitio::CompactArrayWriter CAW(fn,n,b); srand(time(0)); for ( uint64_t i = 0; i < n; ++i ) { CA.set(i,rand() & ((1ull<<b)-1)); CAW.put(CA.get(i)); } CAW.flush(); #if 0 ::libmaus::aio::CheckedOutputStream COS(fn); CA.serialize(COS); COS.flush(); COS.close(); #endif ::libmaus::aio::CheckedInputStream CIS(fn); std::cerr << "compact file size is " << ::libmaus::util::GetFileSize::getFileSize(CIS) << std::endl; assert ( static_cast< ::std::streampos > (CIS.tellg()) == static_cast< ::std::streampos >(0) ); assert ( CIS.get() >= 0 ); ::libmaus::bitio::CompactDecoderWrapper W(fn,4096); W.seekg(0,std::ios::end); int64_t const fs = W.tellg(); W.seekg(0,std::ios::beg); W.clear(); assert ( fs == static_cast<int64_t>(n) ); std::cerr << "n=" << n << " fs=" << fs << std::endl; for ( uint64_t i = 0; i < n; ++i ) { assert ( W.tellg() == static_cast< ::std::streampos >(i) ); int const v = W.get(); assert ( v == static_cast<int>(CA[i]) ); // std::cerr << static_cast<int>(W.get()) << " " << CA[i] << std::endl; } for ( uint64_t i = 0; i < n; i += (rand() % 256) ) { W.clear(); W.seekg(i); std::cerr << "seek to " << W.tellg() << std::endl; for ( uint64_t j = i; j < n; ++j ) { assert ( W.tellg() == static_cast< ::std::streampos >(j) ); int const v = W.get(); assert ( v == static_cast<int>(CA[j]) ); } uint64_t ii = n-i; W.clear(); W.seekg(ii); for ( uint64_t j = ii; j < n; ++j ) { assert ( W.tellg() == static_cast< ::std::streampos >(j) ); int const v = W.get(); assert ( v == static_cast<int>(CA[j]) ); } } }
int main(int argc, char * argv[]) { try { ::libmaus::util::ArgInfo const arginfo(argc,argv); std::string const input = arginfo.getRestArg<std::string>(0); std::string const output = arginfo.getRestArg<std::string>(1); unsigned int const verbose = arginfo.getValue<unsigned int>("verbose",1); unsigned int const addterm = arginfo.getValue<unsigned int>("addterm",0) ? 1 : 0; ::libmaus::autoarray::AutoArray<uint64_t> const chist = computeCharHist(input); uint64_t maxsym = 0; for ( uint64_t i = 0; i < chist.size(); ++i ) if ( chist[i] ) maxsym = i; if ( addterm ) maxsym += 1; unsigned int const b = maxsym ? (64-::libmaus::bitio::Clz::clz(maxsym)) : 0; uint64_t const n = std::accumulate(chist.begin(),chist.end(),0ull); if ( verbose ) std::cerr << "[V] n=" << n << " maxsym=" << maxsym << " b=" << b << std::endl; uint64_t const blocksize = 8*1024; uint64_t const numblocks = (n+blocksize-1)/blocksize; ::libmaus::autoarray::AutoArray<uint8_t> B(blocksize); ::libmaus::aio::CheckedInputStream CIS(input); ::libmaus::bitio::CompactArrayWriter CAW(output,n+addterm,b); int64_t lastperc = -1; if ( verbose ) std::cerr << "[V] "; for ( uint64_t b = 0; b < numblocks; ++b ) { uint64_t const low = std::min(b*blocksize,n); uint64_t const high = std::min(low+blocksize,n); uint64_t const range = high-low; CIS.read ( reinterpret_cast<char *>(B.begin()), range ); assert ( CIS.gcount() == static_cast<int64_t>(range) ); if ( addterm ) for ( uint64_t i = 0; i < range; ++i ) B[i] += 1; CAW.write(B.begin(),range); int64_t const newperc = (high * 100) / n; if ( verbose && newperc != lastperc ) { lastperc = newperc; std::cerr << "(" << newperc << ")"; } } if ( addterm ) CAW.put(0); if ( verbose ) std::cerr << std::endl; CAW.flush(); #if 0 ::libmaus::bitio::CompactDecoderWrapper CDW(output); for ( uint64_t i = 0; i < n+addterm; ++i ) std::cerr << CDW.get(); std::cerr << std::endl; #endif } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; } }