/* * read position of index, which is stored in the last 8 bytes of the file * byte order of the number is big endian */ uint64_t libmaus::huffman::IndexLoaderBase::getIndexPos(std::string const & filename) { ::libmaus::aio::CheckedInputStream indexistr(filename); // read position of index (last 8 bytes of file) // and convert byte order if necessary indexistr.seekg(-8,std::ios::end); uint64_t v; indexistr.read( reinterpret_cast<char *>(&v) , 8 ); #if defined(LIBMAUS_BYTE_ORDER_LITTLE_ENDIAN) #if defined(_WIN32) uint64_t const indexpos = _byteswap_uint64(v); #elif defined(__FreeBSD__) uint64_t const indexpos = bswap64(v); #elif defined(__linux__) uint64_t const indexpos = bswap_64(v); #else uint64_t const indexpos = ::libmaus::util::ReverseByteOrder::reverseByteOrder<uint64_t>(v); #endif #else uint64_t const indexpos = v; #endif // std::cerr << "Index at position " << indexpos << " file length " << ::libmaus::util::GetFileSize::getFileSize(filename) << std::endl; return indexpos; }
IndexDecoderData(std::string const & rfilename) : filename(rfilename), numentries(0), posbits(0), kbits(0), kacc(0), vbits(0), vacc(0), indexvectorpos(0) // , posadpt(this), kadpt(this), vadpt(this) { uint64_t const indexpos = getIndexPos(filename); std::ifstream indexistr(filename.c_str(),std::ios::binary); if ( ! indexistr.is_open() ) { ::libmaus2::exception::LibMausException se; se.getStream() << "IndexDecoderData::IndexDecoderData(): Failed to open file " << filename << std::endl; se.finish(); throw se; } // seek to index position indexistr.clear(); indexistr.seekg(indexpos,std::ios::beg); if ( static_cast<int64_t>(indexistr.tellg()) != static_cast<int64_t>(indexpos) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Failed to seek to position " << indexpos << " of index in file " << filename << " of size " << ::libmaus2::util::GetFileSize::getFileSize(filename) << std::endl; se.finish(); throw se; } ::libmaus2::bitio::StreamBitInputStream SBIS(indexistr); // read size of index numentries = ::libmaus2::bitio::readElias2(SBIS); // pos bits posbits = ::libmaus2::bitio::readElias2(SBIS); // k bits kbits = ::libmaus2::bitio::readElias2(SBIS); // k acc kacc = ::libmaus2::bitio::readElias2(SBIS); // v bits vbits = ::libmaus2::bitio::readElias2(SBIS); // v acc vacc = ::libmaus2::bitio::readElias2(SBIS); // align SBIS.flush(); assert ( SBIS.getBitsRead() % 8 == 0 ); indexvectorpos = indexpos + SBIS.getBitsRead() / 8; }
ifstream_ptr_type openFile() const { ifstream_ptr_type indexistr(new ::std::ifstream(filename.c_str(),std::ios::binary)); if ( ! indexistr->is_open() ) { ::libmaus2::exception::LibMausException se; se.getStream() << "IndexDecoderData::openFile(): Failed to open file " << filename << std::endl; se.finish(); throw se; } return indexistr; }
/* * load index for one file */ static IndexEntryContainer::unique_ptr_type loadAccIndex(std::string const & filename) { uint64_t const indexpos = getIndexPos(filename); ::libmaus::aio::CheckedInputStream indexistr(filename); // seek to index position indexistr.seekg(indexpos,std::ios::beg); // ::libmaus::bitio::StreamBitInputStream SBIS(indexistr); // read size of index uint64_t const numentries = ::libmaus::bitio::readElias2(SBIS); // pos bits unsigned int const posbits = ::libmaus::bitio::readElias2(SBIS); // k bits unsigned int const kbits = ::libmaus::bitio::readElias2(SBIS); // k acc /* uint64_t const symacc = */ ::libmaus::bitio::readElias2(SBIS); // v bits unsigned int const vbits = ::libmaus::bitio::readElias2(SBIS); // v acc /* uint64_t const symacc = */ ::libmaus::bitio::readElias2(SBIS); // align SBIS.flush(); SBIS.getBitsRead(); // std::cerr << "numentries " << numentries << std::endl; // read index libmaus::autoarray::AutoArray< IndexEntry > index(numentries+1,false); for ( uint64_t i = 0; i < numentries+1; ++i ) { uint64_t const pos = SBIS.read(posbits); uint64_t const kcnt = SBIS.read(kbits); uint64_t const vcnt = SBIS.read(vbits); index[i] = IndexEntry(pos,kcnt,vcnt); } IndexEntryContainer::unique_ptr_type IEC(new IndexEntryContainer(index)); return UNIQUE_PTR_MOVE(IEC); }
/* * load index for one file */ static libmaus::autoarray::AutoArray< IndexEntry > loadIndex(std::string const & filename) { uint64_t const indexpos = getIndexPos(filename); std::ifstream indexistr(filename.c_str(),std::ios::binary); if ( ! indexistr.is_open() ) { ::libmaus::exception::LibMausException se; se.getStream() << "RLDecoder::loadIndex(): Failed to open file " << filename << std::endl; se.finish(); throw se; } // seek to index position indexistr.seekg(indexpos,std::ios::beg); if ( static_cast<int64_t>(indexistr.tellg()) != static_cast<int64_t>(indexpos) ) { ::libmaus::exception::LibMausException se; se.getStream() << "Failed to seek to index position " << indexpos << " in file " << filename << " of size " << ::libmaus::util::GetFileSize::getFileSize(filename) << std::endl; se.finish(); throw se; } ::libmaus::bitio::StreamBitInputStream SBIS(indexistr); // read size of index uint64_t const numentries = ::libmaus::bitio::readElias2(SBIS); // pos bits unsigned int const posbits = ::libmaus::bitio::readElias2(SBIS); // k bits unsigned int const kbits = ::libmaus::bitio::readElias2(SBIS); // k acc /* uint64_t const symacc = */ ::libmaus::bitio::readElias2(SBIS); // v bits unsigned int const vbits = ::libmaus::bitio::readElias2(SBIS); // v acc /* uint64_t const symacc = */ ::libmaus::bitio::readElias2(SBIS); // align SBIS.flush(); SBIS.getBitsRead(); // std::cerr << "numentries " << numentries << std::endl; // read index libmaus::autoarray::AutoArray< IndexEntry > index(numentries,false); // #define INDEXLOADERDEBUG #if defined(INDEXLOADERDEBUG) libmaus::autoarray::AutoArray< IndexEntry > tindex(numentries+1); #endif for ( uint64_t i = 0; i < numentries; ++i ) { uint64_t const pos = SBIS.read(posbits); uint64_t const kcnt = SBIS.read(kbits); uint64_t const vcnt = SBIS.read(vbits); index[i] = IndexEntry(pos,kcnt,vcnt); #if defined(INDEXLOADERDEBUG) tindex[i] = index[i]; #endif } if ( numentries ) { assert ( index[0].kcnt == 0 ); assert ( index[0].vcnt == 0 ); for ( uint64_t i = 1; i < numentries; ++i ) { index[i-1].kcnt = index[i].kcnt; index[i-1].vcnt = index[i].vcnt; } /* uint64_t const pos = */ SBIS.read(posbits); index[numentries-1].kcnt = SBIS.read(kbits); index[numentries-1].vcnt = SBIS.read(vbits); #if defined(INDEXLOADERDEBUG) tindex[numentries].kcnt = index[numentries-1].kcnt; tindex[numentries].vcnt = index[numentries-1].vcnt; #endif } for ( uint64_t i = numentries-1; i >= 1; --i ) { index[i].kcnt -= index[i-1].kcnt; index[i].vcnt -= index[i-1].vcnt; } #if defined(INDEXLOADERDEBUG) for ( uint64_t i = 0; i < numentries; ++i ) { assert ( index[i].kcnt == tindex[i+1].kcnt-tindex[i].kcnt ); assert ( index[i].vcnt == tindex[i+1].vcnt-tindex[i].vcnt ); } #endif #if defined(INDEXLOADERDEBUG) IndexDecoderData IDD(filename); assert ( IDD.numentries == numentries ); assert ( IDD.posbits == posbits ); assert ( IDD.kbits == kbits ); assert ( IDD.vbits == vbits ); // assert ( IDD.symacc == symacc ); std::cerr << "((**++CHECKING " << numentries << "..."; uint64_t tkacc = 0; uint64_t tvacc = 0; for ( uint64_t i = 0; i < numentries; ++i ) { IndexEntry const P = IDD.readEntry(i); assert ( P.pos == index[i].pos ); assert ( P.kcnt == tkacc ); assert ( P.vcnt == tvacc ); tkacc += index[i].kcnt; tvacc += index[i].vcnt; } assert ( tkacc == IDD.readEntry(numentries).kcnt ); assert ( tvacc == IDD.readEntry(numentries).vcnt ); std::cerr << "**++))" << std::endl; #endif // std::cerr << "loaded index of size " << numentries << std::endl; return index; }
input_stream_pointer_type openFile() const { input_stream_pointer_type indexistr(new input_stream_type(filename)); return indexistr; }