static libmaus2::autoarray::AutoArray<char> loadFile(std::istream & in) { libmaus2::autoarray::AutoArray<char> C(1); uint64_t p = 0; while ( in ) { in.read(C.begin() + p, C.size()-p); if ( ! in.gcount() ) break; p += in.gcount(); if ( p == C.size() ) { libmaus2::autoarray::AutoArray<char> Cn(2*C.size(),false); std::copy(C.begin(),C.end(),Cn.begin()); C = Cn; } } libmaus2::autoarray::AutoArray<char> Cn(p,false); std::copy(C.begin(),C.begin()+p,Cn.begin()); return Cn; }
int_type underflow() { if ( gptr() < egptr() ) return static_cast<int_type>(*uptr()); assert ( gptr() == egptr() ); char * midptr = buffer.begin() + pushbackspace; uint64_t const copyavail = std::min( // previously read static_cast<uint64_t>(gptr()-eback()), // space we have to copy into static_cast<uint64_t>(midptr-buffer.begin()) ); ::std::memmove(midptr-copyavail,gptr()-copyavail,copyavail); stream.read(midptr, buffer.end()-midptr); size_t const n = stream.gcount(); streamreadpos += n; setg(midptr-copyavail, midptr, midptr+n); if (!n) return traits_type::eof(); return static_cast<int_type>(*uptr()); }
bool decodeBlock() { while ( FBO.file < index.Vfn.size() && FBO.block >= index.blocksPerFile[FBO.file] ) { FBO.file++; FBO.block = 0; FBO.blockoffset = 0; // check this if we change the file format FBO.offset = 0; openFile(); } if ( FBO.file == index.Vfn.size() ) { PSGI.reset(); PISI.reset(); return false; } libmaus2::gamma::GammaDecoder< libmaus2::aio::SynchronousGenericInput<uint64_t> > GD(*PSGI); uint64_t const bs = GD.decode() + 1; B.ensureSize(bs); for ( uint64_t i = 0; i < bs; ++i ) B[i] = GD.decode(); pa = B.begin(); pc = B.begin(); pe = B.begin() + bs; FBO.block += 1; return true; }
void fillBuffer() { assert ( pc == pe ); if ( setpos ) { // std::cerr << "Seeking to " << readpos << std::endl; in.seekg(readpos); in.clear(); } if ( in.peek() >= 0 && readpos < endpos ) { #if 0 std::cerr << "Filling block, readpos " << readpos << " stream at pos " << in.tellg() << " endpos " << endpos << std::endl; #endif uint64_t blocksize = sizeof(uint64_t) + sizeof(uint64_t); // size of uncompressed buffer uint64_t const n = ::libmaus2::util::NumberSerialisation::deserialiseNumber(in); // size of compressed data uint64_t const datasize = ::libmaus2::util::NumberSerialisation::deserialiseNumber(in); // add to block size blocksize += datasize; if ( n > B.size() ) { B = ::libmaus2::autoarray::AutoArray<char>(0,false); B = ::libmaus2::autoarray::AutoArray<char>(n,false); } pa = B.begin(); pc = pa; pe = pa + n; ::libmaus2::aio::IStreamWrapper wrapper(in); ::libmaus2::lz::IstreamSource< ::libmaus2::aio::IStreamWrapper> insource(wrapper,datasize); try { SnappyCompress::uncompress(insource,B.begin(),n); } catch(std::exception const & ex) { libmaus2::exception::LibMausException lme; lme.getStream() << "Failed to decompress snappy compressed data, comp=" << datasize << ", uncomp=" << n << ":\n" << ex.what() << "\n"; lme.finish(); throw lme; } readpos += blocksize; } }
int_type underflow() { if ( gptr() < egptr() ) return static_cast<int_type>(*uptr()); assert ( gptr() == egptr() ); char * midptr = buffer.begin() + pushbackspace; uint64_t const copyavail = std::min( // previously read static_cast<uint64_t>(gptr()-eback()), // space we have to copy into static_cast<uint64_t>(midptr-buffer.begin()) ); ::std::memmove(midptr-copyavail,gptr()-copyavail,copyavail); size_t n = 0; bool done = false; while ( ! done ) { BgzfInflateInfo const info = stream.readAndInfo(midptr, buffer.end()-midptr); n = info.uncompressed; // non eof block if ( n ) { streamreadpos += n; done = true; } else { // eof block at end of stream if ( info.streameof ) { done = true; } // intermediate empty block, skip it else { } } } setg(midptr-copyavail, midptr, midptr+n); if (!n) return traits_type::eof(); return static_cast<int_type>(*uptr()); }
MdStringComputationContext() : T0(256,false), T1(256,false), nm(0) { std::fill(T0.begin(),T0.end(),4); std::fill(T1.begin(),T1.end(),5); T0['A'] = T0['a'] = T1['A'] = T1['a'] = 0; T0['C'] = T0['c'] = T1['C'] = T1['c'] = 1; T0['G'] = T0['g'] = T1['G'] = T1['g'] = 2; T0['T'] = T0['t'] = T1['T'] = T1['t'] = 3; auxvec.set("MD"); auxvec.set("NM"); }
ExternalMemoryIndexGenerator(std::string const & filename) : Pstream(libmaus2::aio::InputOutputStreamFactoryContainer::constructUnique(filename,std::ios::in|std::ios::out|std::ios::trunc|std::ios::binary)), stream(*Pstream), ic(0), flushed(false), writeCache(1024), wa(writeCache.begin()), wc(wa), we(writeCache.end()) { }
void get(uint64_t const i, libmaus2::bambam::GeneFlatFileEntry & entry) const { if ( i >= nl ) { libmaus2::exception::LibMausException lme; lme.getStream() << "GeneFlatFile::get(): line " << i << " is out of range." << std::endl; lme.finish(); throw lme; } std::pair<uint64_t,uint64_t> P = LA->lineInterval(i); while ( P.second != P.first && isspace(C[P.second-1]) ) --P.second; entry.reset(C.begin() + P.first,C.begin() + P.second); }
RLEncoderBaseTemplate(bit_writer_type & rwriter, uint64_t const rnumsyms, uint64_t const bufsize = 4ull*1024ull*1024ull) : writer(rwriter), numsyms(rnumsyms), rlbuffer(bufsize), pa(rlbuffer.begin()), pc(pa), pe(rlbuffer.end()), cursym(0), curcnt(0), indexwritten(false) { // std::cerr << "Writing RL file of length " << numsyms << std::endl; writer.writeElias2(numsyms); }
::libmaus2::util::Histogram::unique_ptr_type libmaus2::util::Utf8String::getHistogram(::libmaus2::autoarray::AutoArray<uint8_t> const & A) { #if defined(_OPENMP) uint64_t const numthreads = omp_get_max_threads(); #else uint64_t const numthreads = 1; #endif ::libmaus2::autoarray::AutoArray<uint64_t> const partstarts = computePartStarts(A,numthreads); uint64_t const numparts = partstarts.size()-1; ::libmaus2::util::Histogram::unique_ptr_type hist(new ::libmaus2::util::Histogram); ::libmaus2::parallel::OMPLock lock; #if defined(_OPENMP) #pragma omp parallel for #endif for ( int64_t t = 0; t < static_cast<int64_t>(numparts); ++t ) { ::libmaus2::util::Histogram::unique_ptr_type lhist(new ::libmaus2::util::Histogram); uint64_t codelen = 0; uint64_t const tcodelen = partstarts[t+1]-partstarts[t]; ::libmaus2::util::GetObject<uint8_t const *> G(A.begin()+partstarts[t]); while ( codelen != tcodelen ) (*lhist)(::libmaus2::util::UTF8::decodeUTF8(G,codelen)); lock.lock(); hist->merge(*lhist); lock.unlock(); } return UNIQUE_PTR_MOVE(hist); }
int_type underflow() { // if there is still data, then return it if ( gptr() < egptr() ) return static_cast<int_type>(*(reinterpret_cast<uint8_t const *>(gptr()))); assert ( gptr() == egptr() ); // number of bytes for putback buffer uint64_t const putbackcopy = std::min( static_cast<uint64_t>(gptr() - eback()), putbackspace ); // copy bytes #if 0 std::copy( gptr()-putbackcopy, gptr(), buffer.begin() + putbackspace - putbackcopy ); #endif std::memmove( buffer.begin() + putbackspace - putbackcopy, gptr()-putbackcopy, putbackcopy ); // load data uint64_t const uncompressedsize = stream.readPart( buffer.begin()+putbackspace, buffer.size()-putbackspace ); // set buffer pointers setg( buffer.begin()+putbackspace-putbackcopy, buffer.begin()+putbackspace, buffer.begin()+putbackspace+uncompressedsize ); if ( uncompressedsize ) return static_cast<int_type>(*(reinterpret_cast<uint8_t const *>(gptr()))); else return traits_type::eof(); }
LinuxStreamingPosixFdOutputStreamBuffer(std::string const & fn, int64_t const rbuffersize) : fd(doOpen(fn)), closefd(true), optblocksize((rbuffersize < 0) ? getOptimalIOBlockSize(fd,std::string()) : rbuffersize), buffersize(optblocksize), buffer(buffersize,false), prevwrite(0,0) { setp(buffer.begin(),buffer.end()-1); }
MemoryOutputStreamBuffer(std::string const & fn, int64_t const rbuffersize) : fd(doOpen(fn)), buffersize((rbuffersize < 0) ? getDefaultBlockSize() : rbuffersize), buffer(buffersize,false) { setp(buffer.begin(),buffer.end()-1); }
BgzfParallelRecodeDeflateBase() : B(getBgzfMaxBlockSize(),false), pa(B.begin()), pc(B.begin()), pe(B.end()) { }
/** * buffer underflow callback * @return next symbol **/ typename base_type::int_type underflow() { if ( base_type::gptr() < base_type::egptr() ) return static_cast<typename base_type::int_type>(*uptr()); assert ( base_type::gptr() == base_type::egptr() ); char_type * midptr = buffer.begin() + pushbackspace; uint64_t const copyavail = std::min( // previously read static_cast<uint64_t>(base_type::gptr()-base_type::eback()), // space we have to copy into static_cast<uint64_t>(midptr-buffer.begin()) ); ::std::memmove(midptr-copyavail,base_type::gptr()-copyavail,copyavail*sizeof(char_type)); if ( static_cast<int64_t>(stream.tellg()) == static_cast<int64_t>(0) ) { stream.seekg(infilesize); stream.clear(); } uint64_t const rspace = stream.tellg(); uint64_t const toread = std::min(rspace,static_cast<uint64_t>(buffer.end()-midptr)); stream.seekg(-static_cast<int64_t>(toread),std::ios::cur); stream.clear(); stream.read(midptr, toread); size_t const n = stream.gcount(); assert ( n == toread ); std::reverse(midptr,midptr+n); streamreadpos += n; stream.seekg(-static_cast<int64_t>(toread),std::ios::cur); stream.clear(); base_type::setg(midptr-copyavail, midptr, midptr+n); if (!n) return base_type::traits_type::eof(); return static_cast<typename base_type::int_type>(*uptr()); }
void exec() { pid = fork(); if ( pid < 0 ) { ::libmaus2::exception::LibMausException ex; ex.getStream() << "failed to fork: " << strerror(errno); ex.finish(); throw ex; } if ( ! pid ) { signal(SIGCHLD,sigchildhandler); while ( true ) { try { ::libmaus2::network::SocketBase::unique_ptr_type recsock = seso->accept(); pid_t childpid = fork(); if ( childpid == 0 ) { try { char const * ptr = data.begin(); char const * ptre = data.end(); uint64_t const bs = 4096; while ( ptr != ptre ) { uint64_t const rest = ptre-ptr; uint64_t const towrite = std::min(bs,rest); recsock->write(ptr,towrite); ptr += towrite; } } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; } _exit(0); } } catch(std::exception const & ex) { std::cerr << "Error in SingleFileServer: " << ex.what() << std::endl; } } _exit(0); } }
GammaRLEncoder(std::string const & filename, unsigned int const ralbits, uint64_t const n, uint64_t const rblocksize, uint64_t const rbufsize = 64*1024) : blocksize(rblocksize), COS(filename), SGO(COS,rbufsize), GE(SGO), A(blocksize), pa(A.begin()), pc(pa), pe(A.end()), cursym(0), curcnt(0), indexwritten(false), albits(ralbits) { SGO.put(n); SGO.put(albits); }
SymBitEncoderBaseTemplate(bit_writer_type & rwriter, uint64_t const bufsize = 64*1024ull) : writer(rwriter), symcntruns(bufsize), ra(symcntruns.begin()), rc(symcntruns.begin()), re(symcntruns.end()), currun(std::numeric_limits<int64_t>::min(),false,0), indexwritten(false) { }
GraphEdgeBlockBuffer(std::string const & filename, uint64_t const bufsize) : COS(filename), B(bufsize,false), pa(B.begin()), pc(pa), pe(B.end()) { }
FastATwoBitTable() : T(static_cast<size_t>(std::numeric_limits<unsigned char>::max())+1,false) { assert ( 3 < T.size() ); std::fill(T.begin(),T.end(),0); T['a'] = T['A'] = 0; T['c'] = T['C'] = 1; T['g'] = T['G'] = 2; T['t'] = T['T'] = 3; }
void getPattern(pattern_type & pat, uint64_t i) const { assert ( i >= FI.low && i < FI.high ); uint64_t const j = i-FI.low; uint64_t const offsetbase = longpointers [ designatorrank->rank1(j) ]; uint64_t const codepos = offsetbase + shortpointers[j]; uint8_t const * code = text.begin()+codepos; ::libmaus2::util::GetObject<uint8_t const *> G(code); ::libmaus2::parallel::SynchronousCounter<uint64_t> nextid(i); CompactFastDecoderBase::decode(pat,G,nextid); }
int_type underflow() { // if there is still data, then return it if ( gptr() < egptr() ) return static_cast<int_type>(*uptr()); assert ( gptr() == egptr() ); // number of bytes for putback buffer uint64_t const putbackcopy = std::min( static_cast<uint64_t>(gptr() - eback()), putbackspace ); // copy bytes std::copy( gptr()-putbackcopy, gptr(), buffer.begin() + putbackspace - putbackcopy ); // load data uint64_t const uncompressedsize = fd->read( buffer.begin()+putbackspace, buffer.size()-putbackspace ); // set buffer pointers setgchecked( buffer.begin()+putbackspace-putbackcopy, buffer.begin()+putbackspace, buffer.begin()+putbackspace+uncompressedsize); symsread += uncompressedsize; if ( uncompressedsize ) return static_cast<int_type>(*uptr()); else return traits_type::eof(); }
MemoryInputOutputStreamBuffer(std::string const & fn, std::ios_base::openmode const cxxmode, int64_t const rbuffersize) : fd(doOpen(fn,cxxmode)), buffersize(rbuffersize < 0 ? getDefaultBlockSize() : rbuffersize), buffer(buffersize,false), readpos(0), writepos(0) { // empty get buffer setg(buffer.end(),buffer.end(),buffer.end()); // empty put buffer setp(buffer.begin(),buffer.end()-1); }
int_type underflow() { // if there is still data, then return it if ( gptr() < egptr() ) return static_cast<int_type>(*uptr()); assert ( gptr() == egptr() ); // load data size_t const g = doRead(buffer.begin(),buffersize); // set buffer pointers setg(buffer.begin(),buffer.begin(),buffer.begin()+g); // update start of buffer position readpos += g; if ( g ) return static_cast<int_type>(*uptr()); else return traits_type::eof(); }
GammaPDDecoder(std::string const & rfn, uint64_t const blocksize = 4096) : fn(rfn), POSI(new libmaus2::aio::OutputStreamInstance(fn)), PSGO(new libmaus2::aio::SynchronousGenericOutput<uint64_t>(*POSI,4096)), metafn(fn + ".meta"), PMETA(new libmaus2::aio::OutputStreamInstance(metafn)), B(blocksize,false), pa(B.begin()), pc(B.begin()), pe(B.end()), headerlength(sizeof(uint64_t)), valueswritten(0), flushed(false) { }
/** * read out the data in the decompressed block * * @param ldata buffer for storing the decompressed block * @param n size of buffer ldata in bytes * @return the number of uncompressed bytes in the buffer **/ uint64_t read(char * const ldata, uint64_t const n) { state = bgzfinflateblockstate_idle; if ( n < getBgzfMaxBlockSize() ) { ::libmaus2::exception::LibMausException se; se.getStream() << "BgzfInflate::decompressBlock(): provided buffer is too small: " << n << " < " << getBgzfMaxBlockSize(); se.finish(false); throw se; } if ( failed() ) throw getException(); uint64_t const ndata = blockinfo.uncompressed; std::copy ( data.begin(), data.begin() + ndata, reinterpret_cast<uint8_t *>(ldata) ); blockinfo = ::libmaus2::lz::BgzfInflateInfo(0,0,true); return ndata; }
::libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > libmaus2::util::Utf8String::getHistogramAsArray(::libmaus2::autoarray::AutoArray<uint8_t> const & A) { #if defined(_OPENMP) uint64_t const numthreads = omp_get_max_threads(); #else uint64_t const numthreads = 1; #endif ::libmaus2::autoarray::AutoArray<uint64_t> const partstarts = computePartStarts(A,numthreads); uint64_t const numparts = partstarts.size()-1; ::libmaus2::parallel::OMPLock lock; ::libmaus2::parallel::PosixMutex mutex; ::libmaus2::util::ExtendingSimpleCountingHash<uint64_t,uint64_t> ESCH(8u); typedef HistogramThread< ::libmaus2::util::GetObject<uint8_t const *> > thread_type; typedef thread_type::unique_ptr_type thread_ptr_type; ::libmaus2::autoarray::AutoArray< ::libmaus2::util::GetObject<uint8_t const *>::unique_ptr_type > getters(numparts); ::libmaus2::autoarray::AutoArray<thread_ptr_type> threads(numparts); for ( uint64_t i = 0; i < numparts; ++i ) { ::libmaus2::util::GetObject<uint8_t const *>::unique_ptr_type tgettersi( new ::libmaus2::util::GetObject<uint8_t const *>(A.begin()+partstarts[i]) ); getters[i] = UNIQUE_PTR_MOVE(tgettersi); thread_ptr_type tthreadsi(new thread_type(*getters[i], partstarts[i+1]-partstarts[i],mutex,ESCH,i)); threads[i] = UNIQUE_PTR_MOVE(tthreadsi); } for ( uint64_t i = 0; i < numparts; ++i ) { threads[i]->join(); threads[i].reset(); } ::libmaus2::autoarray::AutoArray< std::pair<int64_t,uint64_t> > R(ESCH.size(),false); uint64_t p = 0; for ( ::libmaus2::util::ExtendingSimpleCountingHash<uint64_t,uint64_t>::key_type const * ita = ESCH.begin(); ita != ESCH.end(); ++ita ) if ( *ita != ::libmaus2::util::ExtendingSimpleCountingHash<uint64_t,uint64_t>::unused() ) R [ p++ ] = std::pair<int64_t,uint64_t>(*ita,ESCH.getCount(*ita)); std::sort(R.begin(),R.end()); return R; }
/** * seek to absolute position **/ ::std::streampos seekpos(::std::streampos sp, ::std::ios_base::openmode /* which */) { // flush write buffer before seeking anywhere checkWriteBuffer(); // seek off_t const off = doSeek(sp,SEEK_SET); if ( off == static_cast<off_t>(-1) ) return -1; // empty get buffer setg(buffer.end(),buffer.end(),buffer.end()); // empty put buffer setp(buffer.begin(),buffer.end()-1); // set positions readpos = off; writepos = off; return off; }
/** * decompress the currenctly buffered block * * @return number of uncompressed bytes in block, zero for EOF or failure **/ uint64_t decompressBlock() { state = bgzfinflateblockstate_decompressed_block; if ( failed() ) return 0; if ( ! blockinfo.uncompressed ) return 0; try { BgzfInflateBase::decompressBlock( reinterpret_cast<char *>(data.begin()), std::make_pair(blockinfo.compressed,blockinfo.uncompressed) ); return blockinfo.uncompressed; } catch(libmaus2::exception::LibMausException const & lex) { libmaus2::exception::LibMausException::unique_ptr_type tex(lex.uclone()); ex = UNIQUE_PTR_MOVE(tex); return 0; } catch(std::exception const & lex) { libmaus2::exception::LibMausException::unique_ptr_type tex(new libmaus2::exception::LibMausException); ex = UNIQUE_PTR_MOVE(tex); ex->getStream() << lex.what(); ex->finish(false); return 0; } catch(...) { libmaus2::exception::LibMausException::unique_ptr_type tex(new libmaus2::exception::LibMausException); ex = UNIQUE_PTR_MOVE(tex); ex->getStream() << "BgzfInflateBlock::decompressBlock(): unknown exception caught"; ex->finish(false); return 0; } }
virtual size_t compress(char const * input, size_t inputLength, libmaus2::autoarray::AutoArray<char> & output) { zintf->z_deflateReset(); if ( inputLength > inputBound ) { inputBound = inputLength; outputBound = zintf->z_deflateBound(inputBound); } if ( outputBound > output.size() ) output = libmaus2::autoarray::AutoArray<char>(outputBound,false); // maximum number of output bytes zintf->setAvailOut(output.size()); // next compressed output byte zintf->setNextOut(reinterpret_cast<Bytef *>(output.begin())); // number of bytes to be compressed zintf->setAvailIn(inputLength); // data to be compressed zintf->setNextIn(const_cast<Bytef *>(reinterpret_cast<Bytef const *>(input))); int const retcode = zintf->z_deflate(Z_FINISH); // std::cerr << "avail_out=" << strm.avail_out << std::endl; // std::cerr << "avail_in=" << strm.avail_in << std::endl; // call deflate if ( retcode != Z_STREAM_END ) { libmaus2::exception::LibMausException se; se.getStream() << "deflate() failed: " << retcode << ", " << zError(retcode) << std::endl; se.finish(false /* do not translate stack trace */); throw se; } uint64_t const compsize = output.size() - zintf->getAvailOut(); return compsize; }