::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > computeSymAccu() const { uint64_t numint = 0; for ( uint64_t i = 0; i < index.size(); ++i ) numint += index[i].size(); ::libmaus2::autoarray::AutoArray<uint64_t> preaccu(numint+1); uint64_t outptr = 0; for ( uint64_t i = 0; i < index.size(); ++i ) for ( uint64_t j = 0; j < index[i].size(); ++j ) preaccu[outptr++] = index[i][j].vcnt; preaccu.prefixSums(); ::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > symaccu(numint); for ( uint64_t i = 1; i < preaccu.size(); ++i ) symaccu[i-1] = std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i]); #if 0 std::cerr << "presymaccu:" << std::endl; for ( uint64_t i = 0; i < preaccu.size(); ++i ) std::cerr << preaccu[i] << std::endl; std::cerr << "symaccu:" << std::endl; for ( uint64_t i = 0; i < symaccu.size(); ++i ) std::cerr << "[" << i << "]=[" << symaccu[i].first << "," << symaccu[i].second << ")" << std::endl; #endif return symaccu; }
void set(libmaus2::bambam::parallel::FragmentAlignmentBuffer::shared_ptr_type rblock) { block = rblock; std::vector<std::pair<uint8_t *,uint8_t *> > V; block->getLinearOutputFragments(V); std::vector<size_t> const fillVector = block->getFillVector(); assert ( fillVector.size() == V.size() ); if ( V.size() > (D?D->size():0) ) { libmaus2::autoarray::AutoArray<char const *>::shared_ptr_type T(new libmaus2::autoarray::AutoArray<char const *>(V.size(),false)); D = T; } if ( V.size() > (S?S->size():0) ) { libmaus2::autoarray::AutoArray<size_t>::shared_ptr_type T(new libmaus2::autoarray::AutoArray<size_t>(V.size(),false)); S = T; } if ( V.size() > (L?L->size():0) ) { libmaus2::autoarray::AutoArray<size_t>::shared_ptr_type T(new libmaus2::autoarray::AutoArray<size_t>(V.size(),false)); L = T; } for ( uint64_t i = 0; i < V.size(); ++i ) { D->at(i) = reinterpret_cast<char const *>(V[i].first); S->at(i) = V[i].second-V[i].first; L->at(i) = fillVector.at(i); } numblocks = V.size(); }
uint64_t operator[](uint64_t const i) const { for ( uint64_t j = 0; j < rank_dictionaries.size(); ++j ) if ( (*(rank_dictionaries[j]))[i] ) return j; return rank_dictionaries.size(); }
void serialise(stream_type & stream) const { ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,D.size()); for ( uint64_t i = 0; i < D.size(); ++i ) ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,D[i]); }
MultiRankCacheLF ( iterator BWT, uint64_t const rn, uint64_t const rmaxval = 0) : n(rn) { if ( n ) { uint64_t maxval = rmaxval; for ( uint64_t i = 0; i < n; ++i ) maxval = std::max ( maxval, static_cast<uint64_t>(BWT[i]) ); rank_dictionaries = ::libmaus2::autoarray::AutoArray < rank_ptr_type >(maxval+1); for ( uint64_t i = 0; i < rank_dictionaries.size(); ++i ) { rank_ptr_type trank_dictionariesi(new rank_type(n+1)); rank_dictionaries[i] = UNIQUE_PTR_MOVE(trank_dictionariesi); writer_type writer = rank_dictionaries[i]->getWriteContext(); for ( uint64_t j = 0; j < n; ++j ) writer.writeBit(BWT[j] == i); // write additional bit to make rankm1 defined for n writer.writeBit(0); writer.flush(); } D = ::libmaus2::autoarray::AutoArray < uint64_t >(rank_dictionaries.size()+1); for ( uint64_t i = 0; i < rank_dictionaries.size(); ++i ) D [ i ] = rank_dictionaries[i]->rank1(n-1); D.prefixSums(); } }
static libmaus2::autoarray::AutoArray<char> loadFile(std::istream & in) { libmaus2::autoarray::AutoArray<char> C(1); uint64_t p = 0; while ( in ) { in.read(C.begin() + p, C.size()-p); if ( ! in.gcount() ) break; p += in.gcount(); if ( p == C.size() ) { libmaus2::autoarray::AutoArray<char> Cn(2*C.size(),false); std::copy(C.begin(),C.end(),Cn.begin()); C = Cn; } } libmaus2::autoarray::AutoArray<char> Cn(p,false); std::copy(C.begin(),C.begin()+p,Cn.begin()); return Cn; }
void broadcastSend( ::libmaus2::network::Interface const & interface, unsigned short const broadcastport, ::libmaus2::autoarray::AutoArray < ::libmaus2::network::ClientSocket::unique_ptr_type > & secondarysockets, unsigned int const packsize = 508 ) const { std::cerr << "Writing FI..."; for ( uint64_t i = 0; i < secondarysockets.size(); ++i ) secondarysockets[i]->writeString(FI.serialise()); std::cerr << "done."; std::cerr << "Broadcasting designators..."; ::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport, secondarysockets,designators.get(),designators.size(),packsize); std::cerr << "done."; std::cerr << "Broadcasting shortpointers..."; ::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport, secondarysockets,shortpointers.get(),shortpointers.size(),packsize); std::cerr << "done."; std::cerr << "Broadcasting longpointers..."; ::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport, secondarysockets,longpointers.get(),longpointers.size(),packsize); std::cerr << "done."; std::cerr << "Broadcasting text..."; ::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport, secondarysockets,text.get(),text.size(),packsize); std::cerr << "done."; }
/** * constructor from hash intervals and file prefix * * @param rHI hash intervals * @param fileprefix prefix for files **/ SynchronousOutputFile8ArrayTemplate( ::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, std::string const & fileprefix ) : HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size())) { init ( HI->size(), fileprefix ); }
/** * constructor from hash intervals and temporary file name generator * * @param rHI hash intervals * @param tmpgen temporary file name generator object **/ SynchronousOutputFile8ArrayTemplate( ::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, ::libmaus2::util::TempFileNameGenerator & tmpgen ) : HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size())) { init ( HI->size(), tmpgen ); }
void serialise(stream_type & stream) const { ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blocksize); ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,lastblocksize); ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,maxblockbytes); ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blockstarts.size()-1); for ( uint64_t i = 0; i < blockstarts.size(); ++i ) ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blockstarts[i]); }
/** * constructor from hash intervals, file names and truncate setting * * @param rHI hash intervals * @param filenames output buffer file names * @param truncate if true, then truncate files during buffer creation **/ SynchronousOutputFile8ArrayTemplate( ::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, std::vector<std::string> const & filenames, bool const truncate ) : HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size())) { init ( filenames, truncate ); }
::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > computeSegAccu() const { ::libmaus2::autoarray::AutoArray<uint64_t> preaccu(index.size()+1); for ( uint64_t i = 0; i < index.size(); ++i ) preaccu[i] = index[i].size(); preaccu.prefixSums(); ::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > accu(index.size()); for ( uint64_t i = 1; i < preaccu.size(); ++i ) accu[i-1] = std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i]); return accu; }
void merge(DArray const & o) { if ( o.D.size() != D.size() ) { ::libmaus2::exception::LibMausException se; se.getStream() << "DArray::merge(): array sizes are not compatible." << std::endl; se.finish(); throw se; } for ( uint64_t i = 0; i < D.size(); ++i ) D[i] += o.D[i]; }
::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > computeSegmentAccu() { uint64_t const numint = index.size(); ::libmaus2::autoarray::AutoArray<uint64_t> preaccu(numint+1); uint64_t k = 0; for ( uint64_t i = 0; i < index.size(); ++i ) preaccu[k++] = index[i].size(); preaccu.prefixSums(); ::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > accu(numint); for ( uint64_t i = 1; i < preaccu.size(); ++i ) accu[i-1] = std::pair<uint64_t,uint64_t>( std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i]) ); return accu; }
::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > computeBlockIntervals() const { uint64_t numblocks = 0; for ( uint64_t i = 0; i < index.size(); ++i ) numblocks += index[i].size(); ::libmaus2::autoarray::AutoArray < uint64_t > lblocksizes = ::libmaus2::autoarray::AutoArray < uint64_t >(numblocks+1); uint64_t * outptr = lblocksizes.begin(); for ( uint64_t i = 0; i < blocksizes.size(); ++i ) for ( uint64_t j = 0; j < blocksizes[i].size(); ++j ) *(outptr++) = blocksizes[i][j]; lblocksizes.prefixSums(); ::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > blockintervals(numblocks); for ( uint64_t i = 1; i < lblocksizes.size(); ++i ) blockintervals[i-1] = std::pair<uint64_t,uint64_t>(lblocksizes[i-1],lblocksizes[i]); return blockintervals; }
::libmaus2::util::Histogram::unique_ptr_type libmaus2::util::Utf8String::getHistogram(::libmaus2::autoarray::AutoArray<uint8_t> const & A) { #if defined(_OPENMP) uint64_t const numthreads = omp_get_max_threads(); #else uint64_t const numthreads = 1; #endif ::libmaus2::autoarray::AutoArray<uint64_t> const partstarts = computePartStarts(A,numthreads); uint64_t const numparts = partstarts.size()-1; ::libmaus2::util::Histogram::unique_ptr_type hist(new ::libmaus2::util::Histogram); ::libmaus2::parallel::OMPLock lock; #if defined(_OPENMP) #pragma omp parallel for #endif for ( int64_t t = 0; t < static_cast<int64_t>(numparts); ++t ) { ::libmaus2::util::Histogram::unique_ptr_type lhist(new ::libmaus2::util::Histogram); uint64_t codelen = 0; uint64_t const tcodelen = partstarts[t+1]-partstarts[t]; ::libmaus2::util::GetObject<uint8_t const *> G(A.begin()+partstarts[t]); while ( codelen != tcodelen ) (*lhist)(::libmaus2::util::UTF8::decodeUTF8(G,codelen)); lock.lock(); hist->merge(*lhist); lock.unlock(); } return UNIQUE_PTR_MOVE(hist); }
void start() { for ( uint64_t i = 0; i < threads.size(); ++i ) threads[i]->start(); setup(); }
CompactNumberArray(iterator a, iterator e) : H(getBitWT(a,e)), C(H->enctable.maxsym+1) { for ( int i = H->enctable.minsym; i <= H->enctable.maxsym; ++i ) if ( i && H->enctable.checkSymbol(i) ) { uint64_t const numsyms = H->rank(i, (e-a)-1); C[i] = ::libmaus2::bitio::CompactArray::unique_ptr_type(new ::libmaus2::bitio::CompactArray(numsyms,i)); std::cerr << numsyms << " symbols use " << i << " bits " << std::endl; } uint64_t j = 0; for ( iterator i = a; i != e; ++i, ++j ) { uint64_t const bits = ::libmaus2::math::bitsPerNum(*i); assert ( (*H)[j] == bits ); if ( bits ) { assert ( bits < C.size() ); assert ( C[bits].get() ); uint64_t const rank = H->rank(bits,j)-1; C[bits]->set( rank , *i ); } } j = 0; for ( iterator i = a; i != e; ++i, ++j ) assert ( (*this)[j] == (*i) ); }
//! set up decoder for next chunk bool setup() { while ( true ) { // next chunk if ( chunkidx < chunks.size() ) { wrapper.resetStream(chunks[chunkidx].first,chunks[chunkidx].second); chunkidx++; return true; } // next range else if ( rangeidx < ranges.size() ) { rangecur = ranges[rangeidx++].get(); chunks = rangecur->getChunks(index); chunkidx = 0; } // no more ranges else { return false; } } }
void start(uint64_t const stacksize) { for ( uint64_t i = 0; i < threads.size(); ++i ) threads[i]->startStack(stacksize); setup(); }
uint64_t sortedSymbol(uint64_t r) const { uint64_t const syms = rank_dictionaries.size(); for ( unsigned int i = 0; i < syms; ++i ) if ( D[syms-i-1] <= r ) return syms-i-1; return 0; }
void init() { for ( uint64_t i = 0; i < inflateB.size(); ++i ) { libmaus2::lz::BgzfInflateBlock::unique_ptr_type tinflateB(new libmaus2::lz::BgzfInflateBlock(i)); inflateB[i] = UNIQUE_PTR_MOVE(tinflateB); inflatefreelist.push_back(i); } for ( uint64_t i = 0; i < inflateB.size(); ++i ) inflategloblist.enque( BgzfThreadQueueElement( libmaus2::lz::BgzfThreadOpBase::libmaus2_lz_bgzf_op_read_block, i, 0 ) ); }
void flush() { for ( uint64_t i = 0; i < contexts.size(); ++i ) { contexts[i]->writeBit(0); contexts[i]->flush(); // std::cerr << "Flushed context " << i << std::endl; } }
void fillBuffer() { assert ( pc == pe ); if ( setpos ) { // std::cerr << "Seeking to " << readpos << std::endl; in.seekg(readpos); in.clear(); } if ( in.peek() >= 0 && readpos < endpos ) { #if 0 std::cerr << "Filling block, readpos " << readpos << " stream at pos " << in.tellg() << " endpos " << endpos << std::endl; #endif uint64_t blocksize = sizeof(uint64_t) + sizeof(uint64_t); // size of uncompressed buffer uint64_t const n = ::libmaus2::util::NumberSerialisation::deserialiseNumber(in); // size of compressed data uint64_t const datasize = ::libmaus2::util::NumberSerialisation::deserialiseNumber(in); // add to block size blocksize += datasize; if ( n > B.size() ) { B = ::libmaus2::autoarray::AutoArray<char>(0,false); B = ::libmaus2::autoarray::AutoArray<char>(n,false); } pa = B.begin(); pc = pa; pe = pa + n; ::libmaus2::aio::IStreamWrapper wrapper(in); ::libmaus2::lz::IstreamSource< ::libmaus2::aio::IStreamWrapper> insource(wrapper,datasize); try { SnappyCompress::uncompress(insource,B.begin(),n); } catch(std::exception const & ex) { libmaus2::exception::LibMausException lme; lme.getStream() << "Failed to decompress snappy compressed data, comp=" << datasize << ", uncomp=" << n << ":\n" << ex.what() << "\n"; lme.finish(); throw lme; } readpos += blocksize; } }
void printIndex() const { for ( uint64_t i = 0; i < index.size(); ++i ) { std::cerr << "file " << i << " " << filenames[i] << std::endl; for ( uint64_t j = 0; j < index[i].size(); ++j ) std::cerr << "(" << index[i][j].pos << "," << index[i][j].kcnt << "," << index[i][j].vcnt << ")" << std::endl; } }
uint64_t createFinalStream(stream_type & out) { flush(); uint64_t p = 0; p += ::libmaus2::util::NumberSerialisation::serialiseNumber(out,symbols); // n p += root->serialize(out); // huffman code tree p += ::libmaus2::util::NumberSerialisation::serialiseNumber(out,contexts.size()); // number of bit vectors std::vector<uint64_t> nodeposvec; for ( uint64_t i = 0; i < contexts.size(); ++i ) { nodeposvec.push_back(p); uint64_t const blockswritten = contexts[i]->blockswritten; uint64_t const datawordswritten = 6*blockswritten; uint64_t const allwordswritten = 8*blockswritten; contexts[i].reset(); tmpcnt.closeOutputTempFile(i); // bits written p += ::libmaus2::serialize::Serialize<uint64_t>::serialize(out,64*datawordswritten); // auto array header (words written) p += ::libmaus2::serialize::Serialize<uint64_t>::serialize(out,allwordswritten); //std::string const filename = outputfilenames[i]; std::istream & istr = tmpcnt.openInputTempFile(i); // std::cerr << "Copying " << allwordswritten << " from stream " << filename << std::endl; ::libmaus2::util::GetFileSize::copy (istr, out, allwordswritten, sizeof(uint64_t)); p += allwordswritten * sizeof(uint64_t); tmpcnt.closeInputTempFile(i); // libmaus2::aio::FileRemoval::removeFile(filename); } uint64_t const indexpos = p; p += ::libmaus2::util::NumberSerialisation::serialiseNumberVector(out,nodeposvec); p += ::libmaus2::util::NumberSerialisation::serialiseNumber(out,indexpos); out.flush(); return p; }
::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > computeSymAccu() { uint64_t numint = 0; for ( uint64_t i = 0; i < index.size(); ++i ) numint += index[i].size(); ::libmaus2::autoarray::AutoArray<uint64_t> preaccu(numint+1); uint64_t k = 0; for ( uint64_t i = 0; i < index.size(); ++i ) for ( uint64_t j = 0; j < index[i].size(); ++j ) preaccu[k++] = index[i][j].vcnt; preaccu.prefixSums(); ::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > accu(numint); for ( uint64_t i = 1; i < preaccu.size(); ++i ) accu[i-1] = std::pair<uint64_t,uint64_t>( std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i]) ); return accu; }
~FileBunchLRU() { for ( uint64_t i = 0; i < files.size(); ++i ) if ( files[i].get() ) { files[i] -> flush(); files[i] -> close(); files[i].reset(); } }
uint64_t get(uint64_t const i) const { uint64_t s = 0; for ( uint64_t j = 0; j < I.size(); ++j ) { libmaus2::index::ExternalMemoryIndexDecoderFindLargestSmallerResult<OverlapMeta> const R = I[j]->findLargestSmaller(OverlapMeta(i,0,0,0,0,0,0),true /* cache only */); s += R.blockid; } return s; }
FastATwoBitTable() : T(static_cast<size_t>(std::numeric_limits<unsigned char>::max())+1,false) { assert ( 3 < T.size() ); std::fill(T.begin(),T.end(),0); T['a'] = T['A'] = 0; T['c'] = T['C'] = 1; T['g'] = T['G'] = 2; T['t'] = T['T'] = 3; }