package_type * getPackage() { libmaus::parallel::ScopePosixSpinLock llock(lock); if ( ! freelistFill ) { uint64_t const newlistsize = packages.size() ? 2*packages.size() : 1; libmaus::autoarray::AutoArray<package_ptr_type> newpackages(newlistsize); libmaus::autoarray::AutoArray<package_type *> newfreelist(newlistsize); for ( uint64_t i = 0; i < packages.size(); ++i ) { newpackages[i] = UNIQUE_PTR_MOVE(packages[i]); } for ( uint64_t i = packages.size(); i < newpackages.size(); ++i ) { package_ptr_type tptr(new package_type); newpackages[i] = UNIQUE_PTR_MOVE(tptr); newfreelist[freelistFill++] = newpackages[i].get(); } packages = newpackages; freelist = newfreelist; } return freelist[--freelistFill]; }
uint64_t getNumKeys() const { if ( index.size() ) return index[index.size()-1].kcnt; else return 0; }
void operator()(CompactFastQContainerDictionaryCreator::codelenrun_type const rt, uint64_t const codelen) { if ( rt == codelenrun_first ) { if ( shortptr >= shortthres ) { numlong++; shortptr = 0; } longptr += codelen; shortptr += codelen; numshort++; } else { if ( shortptr >= shortthres ) { assert ( longidx < longptrs.size() ); desigwc.writeBit(longidx != 0); longptrs[longidx++] = longptr; shortptr = 0; } else { desigwc.writeBit(0); } assert ( shortidx < shortptrs.size() ); shortptrs[shortidx++] = shortptr; longptr += codelen; shortptr += codelen; } }
void serialise(stream_type & stream) const { ::libmaus::util::NumberSerialisation::serialiseNumber(stream,D.size()); for ( uint64_t i = 0; i < D.size(); ++i ) ::libmaus::util::NumberSerialisation::serialiseNumber(stream,D[i]); }
uint64_t getNumValues() const { if ( index.size() ) return index[index.size()-1].vcnt; else return 0; }
void merge(DArray const & o) { if ( o.D.size() != D.size() ) { ::libmaus::exception::LibMausException se; se.getStream() << "DArray::merge(): array sizes are not compatible." << std::endl; se.finish(); throw se; } for ( uint64_t i = 0; i < D.size(); ++i ) D[i] += o.D[i]; }
void encodeFileRange(::libmaus::autoarray::AutoArray<uint8_t> const & data, unsigned int const alph, bw_type & CB) { ::libmaus::timing::RealTimeClock rtc; rtc.start(); ::libmaus::arithmetic::RangeEncoder < bw_type > AE(CB); model_type ME(alph); for ( uint64_t i = 0; i < data.size(); ++i ) AE.encodeUpdate(ME,data[i]); AE.flush(true /* add end marker */); std::cerr << "Encoded in " << rtc.getElapsedSeconds() << " s, " << 1./(rtc.getElapsedSeconds()/data.size()) << std::endl; }
::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > computeSegmentAccu() { uint64_t const numint = index.size(); ::libmaus::autoarray::AutoArray<uint64_t> preaccu(numint+1); uint64_t k = 0; for ( uint64_t i = 0; i < index.size(); ++i ) preaccu[k++] = index[i].size(); preaccu.prefixSums(); ::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > accu(numint); for ( uint64_t i = 1; i < preaccu.size(); ++i ) accu[i-1] = std::pair<uint64_t,uint64_t>( std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i]) ); return accu; }
std::pair<uint64_t,uint64_t> lookupKey(uint64_t k) const { uint64_t fileptr = 0; while ( fileptr < A.size() && k >= A[fileptr]->getNumKeys() ) { k -= A[fileptr]->getNumKeys(); ++fileptr; } if ( fileptr == A.size() ) return std::pair<uint64_t,uint64_t>(fileptr,0); else return std::pair<uint64_t,uint64_t>(fileptr,A[fileptr]->lookupKey(k)); }
std::pair<uint64_t,uint64_t> lookupValue(uint64_t v) const { uint64_t fileptr = 0; while ( fileptr < A.size() && v >= A[fileptr]->getNumValues() ) { v -= A[fileptr]->getNumValues(); ++fileptr; } if ( fileptr == A.size() ) return std::pair<uint64_t,uint64_t>(fileptr,0); else return std::pair<uint64_t,uint64_t>(fileptr,A[fileptr]->lookupValue(v)); }
CheckOverlapResultMergeInput(std::vector<std::string> const & inputfilenames) : in(inputfilenames.size()) { for ( uint64_t i = 0; i < in.size(); ++i ) in[i] = UNIQUE_PTR_MOVE(CheckOverlapResultInput::unique_ptr_type(new CheckOverlapResultInput(inputfilenames[i]))); for ( uint64_t i = 0; i < in.size(); ++i ) { CheckOverlapResult::shared_ptr_type ptr = in[i]->get(); if ( ptr ) heap.push ( std::pair<uint64_t,CheckOverlapResult::shared_ptr_type>(i,ptr) ); } }
::libmaus::util::Histogram::unique_ptr_type libmaus::util::Utf8String::getHistogram(::libmaus::autoarray::AutoArray<uint8_t> const & A) { #if defined(_OPENMP) uint64_t const numthreads = omp_get_max_threads(); #else uint64_t const numthreads = 1; #endif ::libmaus::autoarray::AutoArray<uint64_t> const partstarts = computePartStarts(A,numthreads); uint64_t const numparts = partstarts.size()-1; ::libmaus::util::Histogram::unique_ptr_type hist(new ::libmaus::util::Histogram); ::libmaus::parallel::OMPLock lock; #if defined(_OPENMP) #pragma omp parallel for #endif for ( int64_t t = 0; t < static_cast<int64_t>(numparts); ++t ) { ::libmaus::util::Histogram::unique_ptr_type lhist(new ::libmaus::util::Histogram); uint64_t codelen = 0; uint64_t const tcodelen = partstarts[t+1]-partstarts[t]; ::libmaus::util::GetObject<uint8_t const *> G(A.begin()+partstarts[t]); while ( codelen != tcodelen ) (*lhist)(::libmaus::util::UTF8::decodeUTF8(G,codelen)); lock.lock(); hist->merge(*lhist); lock.unlock(); } return UNIQUE_PTR_MOVE(hist); }
/** * increment frequency of i by 1 * * @param i index whose frequency is to be incremented **/ void operator()(uint64_t const i) { if ( i < low.size() ) low[i]++; else all[i]++; }
/** * add v to frequency of i * * @param i index of value to increase * @param v value to add **/ void add(uint64_t const i, uint64_t const v) { if ( i < low.size() ) low[i] += v; else all[i] += v; }
::libmaus::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > computeBlockIntervals() const { uint64_t numblocks = 0; for ( uint64_t i = 0; i < index.size(); ++i ) numblocks += index[i].size(); ::libmaus::autoarray::AutoArray < uint64_t > lblocksizes = ::libmaus::autoarray::AutoArray < uint64_t >(numblocks+1); uint64_t * outptr = lblocksizes.begin(); for ( uint64_t i = 0; i < blocksizes.size(); ++i ) for ( uint64_t j = 0; j < blocksizes[i].size(); ++j ) *(outptr++) = blocksizes[i][j]; lblocksizes.prefixSums(); ::libmaus::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > blockintervals(numblocks); for ( uint64_t i = 1; i < lblocksizes.size(); ++i ) blockintervals[i-1] = std::pair<uint64_t,uint64_t>(lblocksizes[i-1],lblocksizes[i]); return blockintervals; }
uint64_t createFinalStream(stream_type & out) { flush(); uint64_t p = 0; p += ::libmaus::util::NumberSerialisation::serialiseNumber(out,symbols); // n p += root->serialize(out); // huffman code tree p += ::libmaus::util::NumberSerialisation::serialiseNumber(out,contexts.size()); // number of bit vectors std::vector<uint64_t> nodeposvec; for ( uint64_t i = 0; i < contexts.size(); ++i ) { nodeposvec.push_back(p); uint64_t const blockswritten = contexts[i]->blockswritten; uint64_t const datawordswritten = 6*blockswritten; uint64_t const allwordswritten = 8*blockswritten; contexts[i].reset(); tmpcnt.closeOutputTempFile(i); // bits written p += ::libmaus::serialize::Serialize<uint64_t>::serialize(out,64*datawordswritten); // auto array header (words written) p += ::libmaus::serialize::Serialize<uint64_t>::serialize(out,allwordswritten); //std::string const filename = outputfilenames[i]; //::libmaus::aio::CheckedInputStream istr(filename); std::istream & istr = tmpcnt.openInputTempFile(i); // std::ifstream istr(filename.c_str(),std::ios::binary); // std::cerr << "Copying " << allwordswritten << " from stream " << filename << std::endl; ::libmaus::util::GetFileSize::copy (istr, out, allwordswritten, sizeof(uint64_t)); p += allwordswritten * sizeof(uint64_t); tmpcnt.closeInputTempFile(i); // remove(filename.c_str()); } uint64_t const indexpos = p; p += ::libmaus::util::NumberSerialisation::serialiseNumberVector(out,nodeposvec); p += ::libmaus::util::NumberSerialisation::serialiseNumber(out,indexpos); out.flush(); return p; }
IndexEntry const * lookupValuePointer(uint64_t const v) const { uint64_t const i = lookupValue(v); if ( i+1 < index.size() ) return &(index[i]); else return 0; }
void cleanup() { for ( uint64_t i = 0; i < alloclist.size(); ++i ) delete alloclist[i]; alloclist = libmaus::autoarray::AutoArray<element_type *>(0); freelist = libmaus::autoarray::AutoArray<element_type *>(0); freelistfill = 0; }
virtual size_t compress(char const * input, size_t inputLength, libmaus::autoarray::AutoArray<char> & output) { uint64_t compressBound = SnappyCompress::compressBound(inputLength); if ( output.size() < compressBound ) output = libmaus::autoarray::AutoArray<char>(compressBound,false); return SnappyCompress::rawcompress(input,inputLength,output.begin()); }
libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > libmaus::util::GenericIntervalTree::computeNonEmpty(::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & V) { uint64_t nonempty = 0; for ( uint64_t i = 0; i < V.size(); ++i ) if ( V[i].first != V[i].second ) nonempty++; if ( nonempty == 0 ) std::cerr << "all of the " << V.size() << " intervals are empty." << std::endl; ::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > R(nonempty); nonempty = 0; for ( uint64_t i = 0; i < V.size(); ++i ) if ( V[i].first != V[i].second ) R [ nonempty++ ] = V[i]; return R; }
IndexEntry const * lookupKeyPointer(uint64_t const k) const { uint64_t const i = lookupKey(k); if ( i+1 < index.size() ) return &(index[i]); else return 0; }
unique_ptr_type extend() const { unique_ptr_type O(new this_type(slog+1)); for ( uint64_t i = 0; i < H.size(); ++i ) if ( H[i].first != base_type::unused() ) O->insert ( H[i].first, H[i].second ); return UNIQUE_PTR_MOVE(O); }
libmaus::bitio::IndexedBitVector::unique_ptr_type libmaus::util::GenericIntervalTree::computeNonEmptyBV(::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & V) { ::libmaus::bitio::IndexedBitVector::unique_ptr_type BV(new ::libmaus::bitio::IndexedBitVector(V.size())); for ( uint64_t i = 0; i < V.size(); ++i ) (*BV)[i] = (V[i].first != V[i].second); BV->setupIndex(); return UNIQUE_PTR_MOVE(BV); }
BamSeqEncodeTable() : A(256) { char const * s = "=ACMGRSVTWYHKDBN"; for ( uint64_t i = 0; i < A.size(); ++i ) A[i] = strlen(s); for ( uint64_t i = 0; i < strlen(s); ++i ) A [ s[i] ] = i; }
void flush() { for ( uint64_t i = 0; i < contexts.size(); ++i ) { contexts[i]->writeBit(0); contexts[i]->flush(); // std::cerr << "Flushed context " << i << std::endl; } }
BgzfDeflateParallel(std::ostream & rdeflateout, uint64_t const rnumthreads, uint64_t const rnumbuffers, int const level, std::ostream * rdeflateindexostr = 0) : deflategloblist(), deflatecontext(deflategloblist,rdeflateout,rnumbuffers,level,BgzfDeflateParallelContext::getDefaultDeflateGetCur(),rdeflateindexostr), T(rnumthreads) { for ( uint64_t i = 0; i < T.size(); ++i ) { BgzfDeflateParallelThread::unique_ptr_type tTi(new BgzfDeflateParallelThread(deflatecontext)); T[i] = UNIQUE_PTR_MOVE(tTi); T[i]->start(); } }
::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > computeSymAccu() { uint64_t numint = 0; for ( uint64_t i = 0; i < index.size(); ++i ) numint += index[i].size(); ::libmaus::autoarray::AutoArray<uint64_t> preaccu(numint+1); uint64_t k = 0; for ( uint64_t i = 0; i < index.size(); ++i ) for ( uint64_t j = 0; j < index[i].size(); ++j ) preaccu[k++] = index[i][j].vcnt; preaccu.prefixSums(); ::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > accu(numint); for ( uint64_t i = 1; i < preaccu.size(); ++i ) accu[i-1] = std::pair<uint64_t,uint64_t>( std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i]) ); return accu; }
~FileBunchLRU() { for ( uint64_t i = 0; i < files.size(); ++i ) if ( files[i].get() ) { files[i] -> flush(); files[i] -> close(); files[i].reset(); } }
void fillBuffer() { assert ( pc == pe ); if ( setpos ) { // std::cerr << "Seeking to " << readpos << std::endl; in.seekg(readpos); in.clear(); } if ( in.peek() >= 0 && readpos < endpos ) { #if 0 std::cerr << "Filling block, readpos " << readpos << " stream at pos " << in.tellg() << " endpos " << endpos << std::endl; #endif uint64_t blocksize = sizeof(uint64_t) + ( bigbuf ? sizeof(uint64_t) : 0 ); // size of uncompressed buffer uint64_t const n = bigbuf ? ::libmaus::util::NumberSerialisation::deserialiseNumber(in) : ::libmaus::util::UTF8::decodeUTF8(in,blocksize) ; // size of compressed data uint64_t const datasize = ::libmaus::util::NumberSerialisation::deserialiseNumber(in); // add to block size blocksize += datasize; if ( n > B.size() ) { B = ::libmaus::autoarray::AutoArray<char>(0,false); B = ::libmaus::autoarray::AutoArray<char>(n,false); } pa = B.begin(); pc = pa; pe = pa + n; ::libmaus::aio::IStreamWrapper wrapper(in); ::libmaus::lz::IstreamSource< ::libmaus::aio::IStreamWrapper> insource(wrapper,datasize); SnappyCompress::uncompress(insource,B.begin(),n); readpos += blocksize; } }
Histogram & operator=(Histogram const & o) { if ( this != &o ) { all = o.all; if ( low.size() != o.low.size() ) low = ::libmaus::autoarray::AutoArray<uint64_t>(o.low.size(),false); std::copy(o.low.begin(),o.low.end(),low.begin()); } return *this; }