package_type * getPackage()
			{
				libmaus::parallel::ScopePosixSpinLock llock(lock);
			
				if ( ! freelistFill )
				{
					uint64_t const newlistsize = packages.size() ? 2*packages.size() : 1;
					
					libmaus::autoarray::AutoArray<package_ptr_type> newpackages(newlistsize);
					libmaus::autoarray::AutoArray<package_type *> newfreelist(newlistsize);
					
					for ( uint64_t i = 0; i < packages.size(); ++i )
					{
						newpackages[i] = UNIQUE_PTR_MOVE(packages[i]);
					}
					for ( uint64_t i = packages.size(); i < newpackages.size(); ++i )
					{
						package_ptr_type tptr(new package_type);
						newpackages[i] = UNIQUE_PTR_MOVE(tptr);
						newfreelist[freelistFill++] = newpackages[i].get();
					}
					
					packages = newpackages;
					freelist = newfreelist;
				}
				
				return freelist[--freelistFill];
			}
Пример #2
0
			uint64_t getNumKeys() const
			{
				if ( index.size() )
					return index[index.size()-1].kcnt;
				else
					return 0;
			}
			void operator()(CompactFastQContainerDictionaryCreator::codelenrun_type const rt, uint64_t const codelen)
			{
				if ( rt == codelenrun_first )
				{
					if ( shortptr >= shortthres )
					{
						numlong++;
						shortptr = 0;
					}
					
					longptr += codelen;
					shortptr += codelen;				
					numshort++;			
				}
				else
				{
					if ( shortptr >= shortthres )
					{
						assert ( longidx < longptrs.size() );
						desigwc.writeBit(longidx != 0);
						longptrs[longidx++] = longptr;
						shortptr = 0;
					}
					else
					{
						desigwc.writeBit(0);
					}
					
					assert ( shortidx < shortptrs.size() );
					shortptrs[shortidx++] = shortptr;
					
					longptr += codelen;
					shortptr += codelen;				
				}
			}
Пример #4
0
			void serialise(stream_type & stream) const
			{	
				::libmaus::util::NumberSerialisation::serialiseNumber(stream,D.size());
				
				for ( uint64_t i = 0; i < D.size(); ++i )
					::libmaus::util::NumberSerialisation::serialiseNumber(stream,D[i]);
			}
Пример #5
0
			uint64_t getNumValues() const
			{
				if ( index.size() )
					return index[index.size()-1].vcnt;
				else
					return 0;
			}
Пример #6
0
			void merge(DArray const & o)
			{
				if ( o.D.size() != D.size() )
				{
					::libmaus::exception::LibMausException se;
					se.getStream() << "DArray::merge(): array sizes are not compatible." << std::endl;
					se.finish();
					throw se;
				}
				
				for ( uint64_t i = 0; i < D.size(); ++i )
					D[i] += o.D[i];
			}
Пример #7
0
void encodeFileRange(::libmaus::autoarray::AutoArray<uint8_t> const & data, unsigned int const alph, bw_type & CB)
{
	::libmaus::timing::RealTimeClock rtc; rtc.start();

	::libmaus::arithmetic::RangeEncoder < bw_type > AE(CB);
	model_type ME(alph);

	for ( uint64_t i = 0; i < data.size(); ++i )
		AE.encodeUpdate(ME,data[i]);
	AE.flush(true /* add end marker */);
	
	std::cerr << "Encoded in " << rtc.getElapsedSeconds() << " s, " 
		<< 1./(rtc.getElapsedSeconds()/data.size())
		<< std::endl;	
}
Пример #8
0
			::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > computeSegmentAccu()
			{
				uint64_t const numint = index.size();
				::libmaus::autoarray::AutoArray<uint64_t> preaccu(numint+1);
				uint64_t k = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					preaccu[k++] = index[i].size();
				preaccu.prefixSums();
				::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > accu(numint);
				for ( uint64_t i = 1; i < preaccu.size(); ++i )
					accu[i-1] = std::pair<uint64_t,uint64_t>(
						std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i])
						);
				return accu;
			}
Пример #9
0
			std::pair<uint64_t,uint64_t> lookupKey(uint64_t k) const
			{
				uint64_t fileptr = 0;
				
				while ( fileptr < A.size() && k >= A[fileptr]->getNumKeys() )
				{
					k -= A[fileptr]->getNumKeys();
					++fileptr;
				}
					
				if ( fileptr == A.size() )
					return std::pair<uint64_t,uint64_t>(fileptr,0);
				else
					return std::pair<uint64_t,uint64_t>(fileptr,A[fileptr]->lookupKey(k));
			}
Пример #10
0
			std::pair<uint64_t,uint64_t> lookupValue(uint64_t v) const
			{
				uint64_t fileptr = 0;
				
				while ( fileptr < A.size() && v >= A[fileptr]->getNumValues() )
				{
					v -= A[fileptr]->getNumValues();
					++fileptr;
				}
					
				if ( fileptr == A.size() )
					return std::pair<uint64_t,uint64_t>(fileptr,0);
				else
					return std::pair<uint64_t,uint64_t>(fileptr,A[fileptr]->lookupValue(v));
			}
Пример #11
0
			CheckOverlapResultMergeInput(std::vector<std::string> const & inputfilenames)
			: in(inputfilenames.size())
			{
				for ( uint64_t i = 0; i < in.size(); ++i )
					in[i] = UNIQUE_PTR_MOVE(CheckOverlapResultInput::unique_ptr_type(new CheckOverlapResultInput(inputfilenames[i])));

				for ( uint64_t i = 0; i < in.size(); ++i )
				{
					CheckOverlapResult::shared_ptr_type ptr = in[i]->get();
					
					if ( ptr )
						heap.push ( 
							std::pair<uint64_t,CheckOverlapResult::shared_ptr_type>(i,ptr) 
						);
				}
			}
Пример #12
0
::libmaus::util::Histogram::unique_ptr_type libmaus::util::Utf8String::getHistogram(::libmaus::autoarray::AutoArray<uint8_t> const & A)
{
	#if defined(_OPENMP)
	uint64_t const numthreads = omp_get_max_threads();
	#else
	uint64_t const numthreads = 1;
	#endif
	
	::libmaus::autoarray::AutoArray<uint64_t> const partstarts = computePartStarts(A,numthreads);
	uint64_t const numparts = partstarts.size()-1;
	
	::libmaus::util::Histogram::unique_ptr_type hist(new ::libmaus::util::Histogram);
	::libmaus::parallel::OMPLock lock;
	
	#if defined(_OPENMP)
	#pragma omp parallel for
	#endif
	for ( int64_t t = 0; t < static_cast<int64_t>(numparts); ++t )
	{
		::libmaus::util::Histogram::unique_ptr_type lhist(new ::libmaus::util::Histogram);
	
		uint64_t codelen = 0;
		uint64_t const tcodelen = partstarts[t+1]-partstarts[t];
		::libmaus::util::GetObject<uint8_t const *> G(A.begin()+partstarts[t]);
		
		while ( codelen != tcodelen )
			(*lhist)(::libmaus::util::UTF8::decodeUTF8(G,codelen));
			
		lock.lock();
		hist->merge(*lhist);
		lock.unlock();
	}
	
	return UNIQUE_PTR_MOVE(hist);
}
Пример #13
0
			/**
			 * increment frequency of i by 1
			 *
			 * @param i index whose frequency is to be incremented
			 **/
			void operator()(uint64_t const i)
			{
				if ( i < low.size() )
					low[i]++;
				else
					all[i]++;
			}
Пример #14
0
			/**
			 * add v to frequency of i
			 *
			 * @param i index of value to increase
			 * @param v value to add
			 **/
			void add(uint64_t const i, uint64_t const v)
			{			
				if ( i < low.size() )
					low[i] += v;
				else
					all[i] += v;
			}
Пример #15
0
			::libmaus::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > computeBlockIntervals() const
			{
				uint64_t numblocks = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					numblocks += index[i].size();
				::libmaus::autoarray::AutoArray < uint64_t > lblocksizes = ::libmaus::autoarray::AutoArray < uint64_t >(numblocks+1);
				uint64_t * outptr = lblocksizes.begin();
				for ( uint64_t i = 0; i < blocksizes.size(); ++i )
					for ( uint64_t j = 0; j < blocksizes[i].size(); ++j )
						*(outptr++) = blocksizes[i][j];
				lblocksizes.prefixSums();
				::libmaus::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > blockintervals(numblocks);
				for ( uint64_t i = 1; i < lblocksizes.size(); ++i )
					blockintervals[i-1] = std::pair<uint64_t,uint64_t>(lblocksizes[i-1],lblocksizes[i]);
				return blockintervals;
			}
			uint64_t createFinalStream(stream_type & out)
			{			
				flush();

				uint64_t p = 0;
				p += ::libmaus::util::NumberSerialisation::serialiseNumber(out,symbols); // n
				p += root->serialize(out); // huffman code tree
				p += ::libmaus::util::NumberSerialisation::serialiseNumber(out,contexts.size()); // number of bit vectors
				
				std::vector<uint64_t> nodeposvec;

				for ( uint64_t i = 0; i < contexts.size(); ++i )
				{
					nodeposvec.push_back(p);
				
					uint64_t const blockswritten = contexts[i]->blockswritten;
					uint64_t const datawordswritten = 6*blockswritten;
					uint64_t const allwordswritten = 8*blockswritten;
						
					contexts[i].reset();
					tmpcnt.closeOutputTempFile(i);	
					
					// bits written
					p += ::libmaus::serialize::Serialize<uint64_t>::serialize(out,64*datawordswritten);
					// auto array header (words written)
					p += ::libmaus::serialize::Serialize<uint64_t>::serialize(out,allwordswritten);
					//std::string const filename = outputfilenames[i];
					//::libmaus::aio::CheckedInputStream istr(filename);
					std::istream & istr = tmpcnt.openInputTempFile(i);
					// std::ifstream istr(filename.c_str(),std::ios::binary);
					// std::cerr << "Copying " << allwordswritten << " from stream " << filename << std::endl;
					::libmaus::util::GetFileSize::copy (istr, out, allwordswritten, sizeof(uint64_t));
					p += allwordswritten * sizeof(uint64_t);
					tmpcnt.closeInputTempFile(i);

					// remove(filename.c_str());
				}
				
				uint64_t const indexpos = p;
				p += ::libmaus::util::NumberSerialisation::serialiseNumberVector(out,nodeposvec);
				p += ::libmaus::util::NumberSerialisation::serialiseNumber(out,indexpos);
					
				out.flush();
				
				return p;
			}
Пример #17
0
			IndexEntry const * lookupValuePointer(uint64_t const v) const
			{
				uint64_t const i = lookupValue(v);
				if ( i+1 < index.size() )
					return &(index[i]);
				else
					return 0;
			}
Пример #18
0
			void cleanup()
			{
				for ( uint64_t i = 0; i < alloclist.size(); ++i )
					delete alloclist[i];
				alloclist = libmaus::autoarray::AutoArray<element_type *>(0);	
				freelist = libmaus::autoarray::AutoArray<element_type *>(0);	
				freelistfill = 0;
			}
Пример #19
0
			virtual size_t compress(char const * input, size_t inputLength, libmaus::autoarray::AutoArray<char> & output)
			{
				uint64_t compressBound = SnappyCompress::compressBound(inputLength);
				if ( output.size() < compressBound )
					output = libmaus::autoarray::AutoArray<char>(compressBound,false);
				
				return SnappyCompress::rawcompress(input,inputLength,output.begin());
			}
Пример #20
0
libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > libmaus::util::GenericIntervalTree::computeNonEmpty(::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & V)
{
	uint64_t nonempty = 0;
	for ( uint64_t i = 0; i < V.size(); ++i )
		if ( V[i].first != V[i].second )
			nonempty++;

	if ( nonempty == 0 )
		std::cerr << "all of the " << V.size() << " intervals are empty." << std::endl;

	::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > R(nonempty);
	nonempty = 0;
	for ( uint64_t i = 0; i < V.size(); ++i )
		if ( V[i].first != V[i].second )
			R [ nonempty++ ] = V[i];
	return R;
}
Пример #21
0
			IndexEntry const * lookupKeyPointer(uint64_t const k) const
			{
				uint64_t const i = lookupKey(k);
				if ( i+1 < index.size() )
					return &(index[i]);
				else
					return 0;
			}
Пример #22
0
			unique_ptr_type extend() const
			{
				unique_ptr_type O(new this_type(slog+1));
				for ( uint64_t i = 0; i < H.size(); ++i )
					if ( H[i].first != base_type::unused() )
						O->insert ( H[i].first, H[i].second );
				return UNIQUE_PTR_MOVE(O);
			}
Пример #23
0
libmaus::bitio::IndexedBitVector::unique_ptr_type libmaus::util::GenericIntervalTree::computeNonEmptyBV(::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & V)
{
	::libmaus::bitio::IndexedBitVector::unique_ptr_type BV(new ::libmaus::bitio::IndexedBitVector(V.size()));
	for ( uint64_t i = 0; i < V.size(); ++i )
		(*BV)[i] = (V[i].first != V[i].second);
	BV->setupIndex();
	return UNIQUE_PTR_MOVE(BV);
}
Пример #24
0
			BamSeqEncodeTable()
			: A(256)
			{
				char const * s = "=ACMGRSVTWYHKDBN";
				for ( uint64_t i = 0; i < A.size(); ++i )
					A[i] = strlen(s);
				for ( uint64_t i = 0; i < strlen(s); ++i )
					A [ s[i] ] = i;
			}
			void flush()
			{
				for ( uint64_t i = 0; i < contexts.size(); ++i )
				{
					contexts[i]->writeBit(0);
					contexts[i]->flush();
					// std::cerr << "Flushed context " << i << std::endl;
				}
			}
Пример #26
0
			BgzfDeflateParallel(std::ostream & rdeflateout, uint64_t const rnumthreads, uint64_t const rnumbuffers, int const level, std::ostream * rdeflateindexostr = 0)
			: deflategloblist(), deflatecontext(deflategloblist,rdeflateout,rnumbuffers,level,BgzfDeflateParallelContext::getDefaultDeflateGetCur(),rdeflateindexostr), T(rnumthreads)
			{
				for ( uint64_t i = 0; i < T.size(); ++i )
				{
					BgzfDeflateParallelThread::unique_ptr_type tTi(new BgzfDeflateParallelThread(deflatecontext));
					T[i] = UNIQUE_PTR_MOVE(tTi);
					T[i]->start();
				}
			}
Пример #27
0
			::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > computeSymAccu()
			{
				uint64_t numint = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					numint += index[i].size();
				::libmaus::autoarray::AutoArray<uint64_t> preaccu(numint+1);
				uint64_t k = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					for ( uint64_t j = 0; j < index[i].size(); ++j )
						preaccu[k++] = index[i][j].vcnt;
						
				preaccu.prefixSums();
				::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > accu(numint);
				for ( uint64_t i = 1; i < preaccu.size(); ++i )
					accu[i-1] = std::pair<uint64_t,uint64_t>(
						std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i])
						);
				return accu;
			}
Пример #28
0
			~FileBunchLRU()
			{
				for ( uint64_t i = 0; i < files.size(); ++i )
					if ( files[i].get() )
					{
						files[i] -> flush();
						files[i] -> close();
						files[i].reset();
					}
			}
Пример #29
0
			void fillBuffer()
			{
				assert ( pc == pe );
				
				if ( setpos )
				{
					// std::cerr << "Seeking to " << readpos << std::endl;
					in.seekg(readpos);
					in.clear();
				}

				if ( in.peek() >= 0 && readpos < endpos )
				{
					#if 0
					std::cerr << "Filling block, readpos " << readpos 
						<< " stream at pos " << in.tellg() 
						<< " endpos " << endpos
						<< std::endl;
					#endif
				
					uint64_t blocksize = sizeof(uint64_t) + ( bigbuf ? sizeof(uint64_t) : 0 );
					
					// size of uncompressed buffer
					uint64_t const n = 
						bigbuf ?
							::libmaus::util::NumberSerialisation::deserialiseNumber(in)
							:
							::libmaus::util::UTF8::decodeUTF8(in,blocksize)
						;

					// size of compressed data
					uint64_t const datasize = ::libmaus::util::NumberSerialisation::deserialiseNumber(in);
					// add to block size
					blocksize += datasize;
						
					if ( n > B.size() )
					{
						B = ::libmaus::autoarray::AutoArray<char>(0,false);
						B = ::libmaus::autoarray::AutoArray<char>(n,false);
					}
					
					pa = B.begin();
					pc = pa;
					pe = pa + n;

					::libmaus::aio::IStreamWrapper wrapper(in);
					::libmaus::lz::IstreamSource< ::libmaus::aio::IStreamWrapper> insource(wrapper,datasize);

					SnappyCompress::uncompress(insource,B.begin(),n);

					readpos += blocksize;
				}
			}
Пример #30
0
			Histogram & operator=(Histogram const & o)
			{
				if ( this != &o )
				{
					all = o.all;
					if ( low.size() != o.low.size() )
						low = ::libmaus::autoarray::AutoArray<uint64_t>(o.low.size(),false);
					std::copy(o.low.begin(),o.low.end(),low.begin());
				}
				
				return *this;
			}