ExternalMemoryIndexGenerator(std::string const & filename) : Pstream(libmaus2::aio::InputOutputStreamFactoryContainer::constructUnique(filename,std::ios::in|std::ios::out|std::ios::trunc|std::ios::binary)), stream(*Pstream), ic(0), flushed(false), writeCache(1024), wa(writeCache.begin()), wc(wa), we(writeCache.end()) { }
MultiRankCacheLF ( iterator BWT, uint64_t const rn, uint64_t const rmaxval = 0) : n(rn) { if ( n ) { uint64_t maxval = rmaxval; for ( uint64_t i = 0; i < n; ++i ) maxval = std::max ( maxval, static_cast<uint64_t>(BWT[i]) ); rank_dictionaries = ::libmaus2::autoarray::AutoArray < rank_ptr_type >(maxval+1); for ( uint64_t i = 0; i < rank_dictionaries.size(); ++i ) { rank_ptr_type trank_dictionariesi(new rank_type(n+1)); rank_dictionaries[i] = UNIQUE_PTR_MOVE(trank_dictionariesi); writer_type writer = rank_dictionaries[i]->getWriteContext(); for ( uint64_t j = 0; j < n; ++j ) writer.writeBit(BWT[j] == i); // write additional bit to make rankm1 defined for n writer.writeBit(0); writer.flush(); } D = ::libmaus2::autoarray::AutoArray < uint64_t >(rank_dictionaries.size()+1); for ( uint64_t i = 0; i < rank_dictionaries.size(); ++i ) D [ i ] = rank_dictionaries[i]->rank1(n-1); D.prefixSums(); } }
uint64_t operator[](uint64_t const i) const { for ( uint64_t j = 0; j < rank_dictionaries.size(); ++j ) if ( (*(rank_dictionaries[j]))[i] ) return j; return rank_dictionaries.size(); }
::libmaus2::util::Histogram::unique_ptr_type libmaus2::util::Utf8String::getHistogram(::libmaus2::autoarray::AutoArray<uint8_t> const & A) { #if defined(_OPENMP) uint64_t const numthreads = omp_get_max_threads(); #else uint64_t const numthreads = 1; #endif ::libmaus2::autoarray::AutoArray<uint64_t> const partstarts = computePartStarts(A,numthreads); uint64_t const numparts = partstarts.size()-1; ::libmaus2::util::Histogram::unique_ptr_type hist(new ::libmaus2::util::Histogram); ::libmaus2::parallel::OMPLock lock; #if defined(_OPENMP) #pragma omp parallel for #endif for ( int64_t t = 0; t < static_cast<int64_t>(numparts); ++t ) { ::libmaus2::util::Histogram::unique_ptr_type lhist(new ::libmaus2::util::Histogram); uint64_t codelen = 0; uint64_t const tcodelen = partstarts[t+1]-partstarts[t]; ::libmaus2::util::GetObject<uint8_t const *> G(A.begin()+partstarts[t]); while ( codelen != tcodelen ) (*lhist)(::libmaus2::util::UTF8::decodeUTF8(G,codelen)); lock.lock(); hist->merge(*lhist); lock.unlock(); } return UNIQUE_PTR_MOVE(hist); }
void serialise(stream_type & stream) const { ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,D.size()); for ( uint64_t i = 0; i < D.size(); ++i ) ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,D[i]); }
::std::streampos seekpos(::std::streampos sp, ::std::ios_base::openmode which = ::std::ios_base::in | ::std::ios_base::out) { if ( which & ::std::ios_base::in ) { int64_t const cur = symsread-(egptr()-gptr()); int64_t const curlow = cur - static_cast<int64_t>(gptr()-eback()); int64_t const curhigh = cur + static_cast<int64_t>(egptr()-gptr()); // call relative seek, if target is in range if ( sp >= curlow && sp <= curhigh ) return seekoff(static_cast<int64_t>(sp) - cur, ::std::ios_base::cur, which); // target is out of range, we really need to seek uint64_t tsymsread = (sp / buffersize)*buffersize; symsread = tsymsread; stream.clear(); stream.seekg( (symsread * b) / 8 ); setg(buffer.end(),buffer.end(),buffer.end()); underflow(); setg(eback(),gptr() + (static_cast<int64_t>(sp)-static_cast<int64_t>(tsymsread)), egptr()); return sp; } return -1; }
SocketOutputBufferTemplate( ::libmaus2::network::SocketBase * rdst, int const rtag, uint64_t const bufsize) : dst(rdst), tag(rtag), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()) { }
::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > computeSymAccu() const { uint64_t numint = 0; for ( uint64_t i = 0; i < index.size(); ++i ) numint += index[i].size(); ::libmaus2::autoarray::AutoArray<uint64_t> preaccu(numint+1); uint64_t outptr = 0; for ( uint64_t i = 0; i < index.size(); ++i ) for ( uint64_t j = 0; j < index[i].size(); ++j ) preaccu[outptr++] = index[i][j].vcnt; preaccu.prefixSums(); ::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > symaccu(numint); for ( uint64_t i = 1; i < preaccu.size(); ++i ) symaccu[i-1] = std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i]); #if 0 std::cerr << "presymaccu:" << std::endl; for ( uint64_t i = 0; i < preaccu.size(); ++i ) std::cerr << preaccu[i] << std::endl; std::cerr << "symaccu:" << std::endl; for ( uint64_t i = 0; i < symaccu.size(); ++i ) std::cerr << "[" << i << "]=[" << symaccu[i].first << "," << symaccu[i].second << ")" << std::endl; #endif return symaccu; }
/** * constructor by output stream * * @param out output stream * @param bufsize output buffer size **/ SynchronousGenericOutput(std::ostream & out, uint64_t const bufsize) : B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), W(out), datawrittentofile(0) { }
int_type underflow() { if ( gptr() < egptr() ) return static_cast<int_type>(*uptr()); assert ( gptr() == egptr() ); char * midptr = buffer.begin() + pushbackspace; uint64_t const copyavail = std::min( // previously read static_cast<uint64_t>(gptr()-eback()), // space we have to copy into static_cast<uint64_t>(midptr-buffer.begin()) ); ::std::memmove(midptr-copyavail,gptr()-copyavail,copyavail); stream.read(midptr, buffer.end()-midptr); size_t const n = stream.gcount(); streamreadpos += n; setg(midptr-copyavail, midptr, midptr+n); if (!n) return traits_type::eof(); return static_cast<int_type>(*uptr()); }
bool decodeBlock() { while ( FBO.file < index.Vfn.size() && FBO.block >= index.blocksPerFile[FBO.file] ) { FBO.file++; FBO.block = 0; FBO.blockoffset = 0; // check this if we change the file format FBO.offset = 0; openFile(); } if ( FBO.file == index.Vfn.size() ) { PSGI.reset(); PISI.reset(); return false; } libmaus2::gamma::GammaDecoder< libmaus2::aio::SynchronousGenericInput<uint64_t> > GD(*PSGI); uint64_t const bs = GD.decode() + 1; B.ensureSize(bs); for ( uint64_t i = 0; i < bs; ++i ) B[i] = GD.decode(); pa = B.begin(); pc = B.begin(); pe = B.begin() + bs; FBO.block += 1; return true; }
void init(bool const repos) { // set empty buffer setgchecked(buffer.end(), buffer.end(), buffer.end()); // seek if ( repos ) fd->lseek(symsread,SEEK_SET); }
void serialise(std::ostream & out) const { ::libmaus2::fastx::FastInterval::serialise(out,FI); designators.serialize(out); shortpointers.serialize(out); longpointers.serialize(out); text.serialize(out); }
StreamWrapperBuffer(stream_type & rstream, ::std::size_t rbuffersize, std::size_t rpushbackspace) : stream(rstream), buffersize(rbuffersize), pushbackspace(rpushbackspace), buffer(buffersize+pushbackspace,false), streamreadpos(0) { setg(buffer.end(), buffer.end(), buffer.end()); }
void init(bool const repos) { // set empty buffer setgchecked(buffer.end(), buffer.end(), buffer.end()); // seek if ( repos ) stream.lseek(symsread); }
BgzfParallelRecodeDeflateBase() : B(getBgzfMaxBlockSize(),false), pa(B.begin()), pc(B.begin()), pe(B.end()) { }
LinuxStreamingPosixFdOutputStreamBuffer(std::string const & fn, int64_t const rbuffersize) : fd(doOpen(fn)), closefd(true), optblocksize((rbuffersize < 0) ? getOptimalIOBlockSize(fd,std::string()) : rbuffersize), buffersize(optblocksize), buffer(buffersize,false), prevwrite(0,0) { setp(buffer.begin(),buffer.end()-1); }
/** * constructor from hash intervals and file prefix * * @param rHI hash intervals * @param fileprefix prefix for files **/ SynchronousOutputFile8ArrayTemplate( ::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, std::string const & fileprefix ) : HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size())) { init ( HI->size(), fileprefix ); }
/** * constructor from hash intervals and temporary file name generator * * @param rHI hash intervals * @param tmpgen temporary file name generator object **/ SynchronousOutputFile8ArrayTemplate( ::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, ::libmaus2::util::TempFileNameGenerator & tmpgen ) : HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size())) { init ( HI->size(), tmpgen ); }
MemoryOutputStreamBuffer(std::string const & fn, int64_t const rbuffersize) : fd(doOpen(fn)), buffersize((rbuffersize < 0) ? getDefaultBlockSize() : rbuffersize), buffer(buffersize,false) { setp(buffer.begin(),buffer.end()-1); }
/** * constructor from hash intervals, file names and truncate setting * * @param rHI hash intervals * @param filenames output buffer file names * @param truncate if true, then truncate files during buffer creation **/ SynchronousOutputFile8ArrayTemplate( ::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, std::vector<std::string> const & filenames, bool const truncate ) : HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size())) { init ( filenames, truncate ); }
void serialise(stream_type & stream) const { ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blocksize); ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,lastblocksize); ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,maxblockbytes); ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blockstarts.size()-1); for ( uint64_t i = 0; i < blockstarts.size(); ++i ) ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blockstarts[i]); }
/** * constructor by file name * * @param filename name of output file * @param bufsize size of output buffer * @param truncate true if file should be truncated false data should be appended * @param offset write offset in bytes **/ SynchronousGenericOutput(std::string const & filename, uint64_t const bufsize, bool const truncate = true, uint64_t const offset = 0, bool const /* metasync */ = true) : B(bufsize,false), pa(B.get()), pc(pa), pe(pa+B.getN()), PW ( truncate ? new ofstream_type(filename) : 0), PF ( truncate ? 0 : new std::fstream(filename.c_str(), std::ios::binary|std::ios::in|std::ios::out|std::ios::ate) ), W ( truncate ? (static_cast<std::ostream &>(*PW)) : (static_cast<std::ostream &>(*PF)) ), datawrittentofile(0) { W.seekp(offset,std::ios::beg); }
/** * @return estimated size of object in bytes **/ uint64_t byteSize() const { return sizeof(uint64_t)+ B.byteSize()+ R->byteSize()+ A8.byteSize()+ A64.byteSize(); }
void exec() { pid = fork(); if ( pid < 0 ) { ::libmaus2::exception::LibMausException ex; ex.getStream() << "failed to fork: " << strerror(errno); ex.finish(); throw ex; } if ( ! pid ) { signal(SIGCHLD,sigchildhandler); while ( true ) { try { ::libmaus2::network::SocketBase::unique_ptr_type recsock = seso->accept(); pid_t childpid = fork(); if ( childpid == 0 ) { try { char const * ptr = data.begin(); char const * ptre = data.end(); uint64_t const bs = 4096; while ( ptr != ptre ) { uint64_t const rest = ptre-ptr; uint64_t const towrite = std::min(bs,rest); recsock->write(ptr,towrite); ptr += towrite; } } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; } _exit(0); } } catch(std::exception const & ex) { std::cerr << "Error in SingleFileServer: " << ex.what() << std::endl; } } _exit(0); } }
void fillBuffer() { assert ( pc == pe ); if ( setpos ) { // std::cerr << "Seeking to " << readpos << std::endl; in.seekg(readpos); in.clear(); } if ( in.peek() >= 0 && readpos < endpos ) { #if 0 std::cerr << "Filling block, readpos " << readpos << " stream at pos " << in.tellg() << " endpos " << endpos << std::endl; #endif uint64_t blocksize = sizeof(uint64_t) + sizeof(uint64_t); // size of uncompressed buffer uint64_t const n = ::libmaus2::util::NumberSerialisation::deserialiseNumber(in); // size of compressed data uint64_t const datasize = ::libmaus2::util::NumberSerialisation::deserialiseNumber(in); // add to block size blocksize += datasize; if ( n > B.size() ) { B = ::libmaus2::autoarray::AutoArray<char>(0,false); B = ::libmaus2::autoarray::AutoArray<char>(n,false); } pa = B.begin(); pc = pa; pe = pa + n; ::libmaus2::aio::IStreamWrapper wrapper(in); ::libmaus2::lz::IstreamSource< ::libmaus2::aio::IStreamWrapper> insource(wrapper,datasize); try { SnappyCompress::uncompress(insource,B.begin(),n); } catch(std::exception const & ex) { libmaus2::exception::LibMausException lme; lme.getStream() << "Failed to decompress snappy compressed data, comp=" << datasize << ", uncomp=" << n << ":\n" << ex.what() << "\n"; lme.finish(); throw lme; } readpos += blocksize; } }
SymBitEncoderBaseTemplate(bit_writer_type & rwriter, uint64_t const bufsize = 64*1024ull) : writer(rwriter), symcntruns(bufsize), ra(symcntruns.begin()), rc(symcntruns.begin()), re(symcntruns.end()), currun(std::numeric_limits<int64_t>::min(),false,0), indexwritten(false) { }
GammaRLEncoder(std::string const & filename, unsigned int const ralbits, uint64_t const n, uint64_t const rblocksize, uint64_t const rbufsize = 64*1024) : blocksize(rblocksize), COS(filename), SGO(COS,rbufsize), GE(SGO), A(blocksize), pa(A.begin()), pc(pa), pe(A.end()), cursym(0), curcnt(0), indexwritten(false), albits(ralbits) { SGO.put(n); SGO.put(albits); }
GraphEdgeBlockBuffer(std::string const & filename, uint64_t const bufsize) : COS(filename), B(bufsize,false), pa(B.begin()), pc(pa), pe(B.end()) { }
SocketInputStreamBuffer(::libmaus2::network::SocketInputInterface & rstream, uint64_t const rblocksize, uint64_t const rputbackspace = 0) : stream(rstream), blocksize(rblocksize), putbackspace(rputbackspace), buffer(putbackspace + blocksize,false) { // set empty buffer setg(buffer.end(), buffer.end(), buffer.end()); }