StreamWrapperBuffer(stream_type & rstream, ::std::size_t rbuffersize, std::size_t rpushbackspace) : stream(rstream), buffersize(rbuffersize), pushbackspace(rpushbackspace), buffer(buffersize+pushbackspace,false), streamreadpos(0) { setg(buffer.end(), buffer.end(), buffer.end()); }
void init(bool const repos) { // set empty buffer setg(buffer.end(), buffer.end(), buffer.end()); // seek if ( repos ) stream->seek(symsread,SEEK_SET); }
void init(bool const repos) { // set empty buffer setg(buffer.end(), buffer.end(), buffer.end()); // seek if ( repos ) stream.lseek(symsread); }
MdStringComputationContext() : T0(256,false), T1(256,false), nm(0) { std::fill(T0.begin(),T0.end(),4); std::fill(T1.begin(),T1.end(),5); T0['A'] = T0['a'] = T1['A'] = T1['a'] = 0; T0['C'] = T0['c'] = T1['C'] = T1['c'] = 1; T0['G'] = T0['g'] = T1['G'] = T1['g'] = 2; T0['T'] = T0['t'] = T1['T'] = T1['t'] = 3; auxvec.set("MD"); auxvec.set("NM"); }
int_type underflow() { if ( gptr() < egptr() ) return static_cast<int_type>(*uptr()); assert ( gptr() == egptr() ); char * midptr = buffer.begin() + pushbackspace; uint64_t const copyavail = std::min( // previously read static_cast<uint64_t>(gptr()-eback()), // space we have to copy into static_cast<uint64_t>(midptr-buffer.begin()) ); ::std::memmove(midptr-copyavail,gptr()-copyavail,copyavail); stream.read(midptr, buffer.end()-midptr); size_t const n = stream.gcount(); streamreadpos += n; setg(midptr-copyavail, midptr, midptr+n); if (!n) return traits_type::eof(); return static_cast<int_type>(*uptr()); }
LinuxStreamingPosixFdOutputStreamBuffer(std::string const & fn, int64_t const rbuffersize) : fd(doOpen(fn)), closefd(true), optblocksize((rbuffersize < 0) ? getOptimalIOBlockSize(fd,std::string()) : rbuffersize), buffersize(optblocksize), buffer(buffersize,false), prevwrite(0,0) { setp(buffer.begin(),buffer.end()-1); }
BgzfParallelRecodeDeflateBase() : B(getBgzfMaxBlockSize(),false), pa(B.begin()), pc(B.begin()), pe(B.end()) { }
GammaRLEncoder(std::string const & filename, unsigned int const ralbits, uint64_t const n, uint64_t const rblocksize, uint64_t const rbufsize = 64*1024) : blocksize(rblocksize), COS(filename), SGO(COS,rbufsize), GE(SGO), A(blocksize), pa(A.begin()), pc(pa), pe(A.end()), cursym(0), curcnt(0), indexwritten(false), albits(ralbits) { SGO.put(n); SGO.put(albits); }
element_type * get() { if ( ! freelistfill ) { // allocate more alignment objects libmaus::autoarray::AutoArray<element_type *> nalloclist( std::max( static_cast<uint64_t>(1), static_cast<uint64_t>(2*alloclist.size()) ) ,false ); std::copy(alloclist.begin(),alloclist.end(),nalloclist.begin()); element_type * nullp = 0; std::fill(nalloclist.begin()+alloclist.size(),nalloclist.end(),nullp); for ( element_type ** p = nalloclist.begin()+alloclist.size(); p != nalloclist.end(); ++p ) *p = new element_type; libmaus::autoarray::AutoArray<element_type *> nfreelist( std::max( static_cast<uint64_t>(1), static_cast<uint64_t>(2*freelist.size()) ) ,false ); std::copy(freelist.begin(),freelist.end(),nfreelist.begin()); std::fill(nfreelist.begin()+freelist.size(),nfreelist.end(),nullp); freelist = nfreelist; for ( element_type ** p = nalloclist.begin()+alloclist.size(); p != nalloclist.end(); ++p ) freelist[freelistfill++] = *p; alloclist = nalloclist; } return freelist[--freelistfill]; }
void put(libmaus::fastx::FastQReader::pattern_type const & pattern) { uint64_t const patlen = getFastQLength(pattern); while ( (C.end() - pc) < static_cast<ptrdiff_t>(patlen) ) { uint64_t const off = pc-C.begin(); uint64_t const newclen = std::max(2*C.size(),static_cast<uint64_t>(1ull)); C.resize(newclen); pc = C.begin()+off; } *(pc)++ = '@'; std::copy(pattern.sid.begin(),pattern.sid.end(),pc); pc += pattern.sid.size(); *(pc++) = '\n'; std::copy(pattern.spattern.begin(), pattern.spattern.end(),pc); pc += pattern.spattern.size(); *(pc++) = '\n'; *(pc)++ = '+'; std::copy(pattern.plus.begin(), pattern.plus.end(),pc); pc += pattern.plus.size(); *(pc++) = '\n'; std::copy(pattern.quality.begin(), pattern.quality.end(),pc); pc += pattern.quality.size(); *(pc++) = '\n'; assert ( pc <= C.end() ); lnumsyms += pattern.spattern.size(); minlen = std::min(minlen,static_cast<uint64_t>(pattern.spattern.size())); maxlen = std::max(maxlen,static_cast<uint64_t>(pattern.spattern.size())); pathigh++; if ( pathigh - patlow == patperblock ) internalFlush(); }
void checkSpace(uint64_t const outlen) { // buffer overflow? if ( freeSpace() < outlen ) { flush(); assert ( opc == opa ); if ( outlen > outbuf.size() ) { ::libmaus::autoarray::AutoArray<uint8_t> newbuf(outlen); std::copy( outbuf.begin(), outbuf.end(), newbuf.begin() ); outbuf = newbuf; opa = outbuf.begin(); opc = opa; ope = outbuf.end(); } } assert ( freeSpace() >= outlen ); }
ConstantStringHash(iterator ita, iterator ite, uint64_t const maxn = 64*1024) { k = 0; n = (1 << k); m = 0; bool ok = false; for ( ; (! ok) && n <= maxn; ++k, n <<= 1, m = (m << 1)|1 ) { libmaus::autoarray::AutoArray<uint64_t> C(n); for ( iterator it = ita; it != ite; ++it ) C [ it->hash() & m ] ++; ok = true; for ( uint64_t i = 0; i < n; ++i ) ok = ok && C[i] <= 1; } if ( ! ok ) { libmaus::exception::LibMausException se; se.getStream() << "Cannot create perfect hash of size <= " << maxn << " for " << ite-ita << " elements" << std::endl; se.finish(); throw se; } H = libmaus::autoarray::AutoArray<int64_t>(n); std::fill(H.begin(),H.end(),-1); for ( iterator it = ita; it != ite; ++it ) H [ it->hash() & m ] = it-ita; for ( iterator it = ita; it != ite; ++it ) assert ( H [ it->hash() & m ] == it-ita ); }
pair_type const * end() const { return H.end(); }
SimpleHashMap(unsigned int const rslog) : slog(rslog), hashsize(1ull << slog), hashmask(hashsize-1), fill(0), H(hashsize,false) { std::fill(H.begin(),H.end(),pair_type(base_type::unused(),value_type())); }
pair_type * end() { return H.end(); }
void resize(uint64_t const tracelen) { trace = ::libmaus::autoarray::AutoArray<step_type>(tracelen,false); te = trace.end(); ta = te; }
AlignmentTraceContainer(uint64_t const tracelen = 0) : trace(tracelen), te(trace.end()), ta(te) { }
const_iterator end() const { return A.end(); }
key_type * end() { return H.end(); }
BgzfDeflateInputBufferBase(uint64_t const bufsize = getBgzfMaxBlockSize()) : inbuf(bufsize,false), pa(inbuf.begin()), pc(pa), pe(inbuf.end()) { assert ( bufsize <= getBgzfMaxBlockSize() ); }
OutputBuffer(uint64_t bufsize, std::ostream & rout) : outbuf(bufsize,false), opa(outbuf.begin()), opc(opa), ope(outbuf.end()), out(rout), written(0), ST() {}
int main(int argc, char * argv[]) { try { ::libmaus::util::ArgInfo const arginfo(argc,argv); std::string const input = arginfo.getRestArg<std::string>(0); std::string const output = arginfo.getRestArg<std::string>(1); unsigned int const verbose = arginfo.getValue<unsigned int>("verbose",1); unsigned int const addterm = arginfo.getValue<unsigned int>("addterm",0) ? 1 : 0; ::libmaus::autoarray::AutoArray<uint64_t> const chist = computeCharHist(input); uint64_t maxsym = 0; for ( uint64_t i = 0; i < chist.size(); ++i ) if ( chist[i] ) maxsym = i; if ( addterm ) maxsym += 1; unsigned int const b = maxsym ? (64-::libmaus::bitio::Clz::clz(maxsym)) : 0; uint64_t const n = std::accumulate(chist.begin(),chist.end(),0ull); if ( verbose ) std::cerr << "[V] n=" << n << " maxsym=" << maxsym << " b=" << b << std::endl; uint64_t const blocksize = 8*1024; uint64_t const numblocks = (n+blocksize-1)/blocksize; ::libmaus::autoarray::AutoArray<uint8_t> B(blocksize); ::libmaus::aio::CheckedInputStream CIS(input); ::libmaus::bitio::CompactArrayWriter CAW(output,n+addterm,b); int64_t lastperc = -1; if ( verbose ) std::cerr << "[V] "; for ( uint64_t b = 0; b < numblocks; ++b ) { uint64_t const low = std::min(b*blocksize,n); uint64_t const high = std::min(low+blocksize,n); uint64_t const range = high-low; CIS.read ( reinterpret_cast<char *>(B.begin()), range ); assert ( CIS.gcount() == static_cast<int64_t>(range) ); if ( addterm ) for ( uint64_t i = 0; i < range; ++i ) B[i] += 1; CAW.write(B.begin(),range); int64_t const newperc = (high * 100) / n; if ( verbose && newperc != lastperc ) { lastperc = newperc; std::cerr << "(" << newperc << ")"; } } if ( addterm ) CAW.put(0); if ( verbose ) std::cerr << std::endl; CAW.flush(); #if 0 ::libmaus::bitio::CompactDecoderWrapper CDW(output); for ( uint64_t i = 0; i < n+addterm; ++i ) std::cerr << CDW.get(); std::cerr << std::endl; #endif } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; } }
ConsensusAux() : M(256), C(256) { std::fill(M.begin(),M.end(),1); std::fill(C.begin(),C.end(),0); }
Buffer(uint64_t const bufsize = 8*1024) : A(bufsize,false), pa(A.begin()), pc(pa), pe(A.end()) { }