Ejemplo n.º 1
0
			::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > computeSymAccu() const
			{
				uint64_t numint = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					numint += index[i].size();
				::libmaus2::autoarray::AutoArray<uint64_t> preaccu(numint+1);
				uint64_t outptr = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					for ( uint64_t j = 0; j < index[i].size(); ++j )
						preaccu[outptr++] = index[i][j].vcnt;
				preaccu.prefixSums();
				::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > symaccu(numint);
				for ( uint64_t i = 1; i < preaccu.size(); ++i )
					symaccu[i-1] = std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i]);
	
				#if 0
				std::cerr << "presymaccu:" << std::endl;
				for ( uint64_t i = 0; i < preaccu.size(); ++i )
					std::cerr << preaccu[i] << std::endl;

				std::cerr << "symaccu:" << std::endl;
				for ( uint64_t i = 0; i < symaccu.size(); ++i )
					std::cerr << "[" << i << "]=[" << symaccu[i].first << "," << symaccu[i].second << ")" << std::endl;
				#endif
					
				return symaccu;
			}
Ejemplo n.º 2
0
				void set(libmaus2::bambam::parallel::FragmentAlignmentBuffer::shared_ptr_type rblock)
				{
					block = rblock;

					std::vector<std::pair<uint8_t *,uint8_t *> > V;
					block->getLinearOutputFragments(V);
					std::vector<size_t> const fillVector = block->getFillVector();
					assert ( fillVector.size() == V.size() );

					if ( V.size() > (D?D->size():0) )
					{
						libmaus2::autoarray::AutoArray<char const *>::shared_ptr_type T(new libmaus2::autoarray::AutoArray<char const *>(V.size(),false));
						D = T;
					}
					if ( V.size() > (S?S->size():0) )
					{
						libmaus2::autoarray::AutoArray<size_t>::shared_ptr_type T(new libmaus2::autoarray::AutoArray<size_t>(V.size(),false));
						S = T;
					}
					if ( V.size() > (L?L->size():0) )
					{
						libmaus2::autoarray::AutoArray<size_t>::shared_ptr_type T(new libmaus2::autoarray::AutoArray<size_t>(V.size(),false));
						L = T;
					}
					for ( uint64_t i = 0; i < V.size(); ++i )
					{
						D->at(i) = reinterpret_cast<char const *>(V[i].first);
						S->at(i) = V[i].second-V[i].first;
						L->at(i) = fillVector.at(i);
					}
					numblocks = V.size();
				}
Ejemplo n.º 3
0
			uint64_t operator[](uint64_t const i) const
			{
				for ( uint64_t j = 0; j < rank_dictionaries.size(); ++j )
					if ( (*(rank_dictionaries[j]))[i] )
						return j;
				return rank_dictionaries.size();
			}
Ejemplo n.º 4
0
			void serialise(stream_type & stream) const
			{
				::libmaus2::util::NumberSerialisation::serialiseNumber(stream,D.size());

				for ( uint64_t i = 0; i < D.size(); ++i )
					::libmaus2::util::NumberSerialisation::serialiseNumber(stream,D[i]);
			}
Ejemplo n.º 5
0
			MultiRankCacheLF ( iterator BWT, uint64_t const rn, uint64_t const rmaxval = 0)
			: n(rn)
			{
				if ( n )
				{
					uint64_t maxval = rmaxval;
					for ( uint64_t i = 0; i < n; ++i )
						maxval = std::max ( maxval, static_cast<uint64_t>(BWT[i]) );
						
					rank_dictionaries = ::libmaus2::autoarray::AutoArray < rank_ptr_type >(maxval+1);
					
					for ( uint64_t i = 0; i < rank_dictionaries.size(); ++i )
					{
						rank_ptr_type trank_dictionariesi(new rank_type(n+1));
						rank_dictionaries[i] = UNIQUE_PTR_MOVE(trank_dictionariesi);
						writer_type writer = rank_dictionaries[i]->getWriteContext();
						
						for ( uint64_t j = 0; j < n; ++j )
							writer.writeBit(BWT[j] == i);
						// write additional bit to make rankm1 defined for n
						writer.writeBit(0);
						
						writer.flush();
					}
					
					D = ::libmaus2::autoarray::AutoArray < uint64_t >(rank_dictionaries.size()+1);
					for ( uint64_t i = 0; i < rank_dictionaries.size(); ++i )
						D [ i ] = rank_dictionaries[i]->rank1(n-1);
					D.prefixSums();
				}
			}
Ejemplo n.º 6
0
			static libmaus2::autoarray::AutoArray<char> loadFile(std::istream & in)
			{
				libmaus2::autoarray::AutoArray<char> C(1);
				uint64_t p = 0;
				
				while ( in )
				{
					in.read(C.begin() + p, C.size()-p);
					
					if ( ! in.gcount() )
						break;

					p += in.gcount();
					
					if ( p == C.size() )
					{
						libmaus2::autoarray::AutoArray<char> Cn(2*C.size(),false);
						std::copy(C.begin(),C.end(),Cn.begin());
						C = Cn;
					}
				}
				
				libmaus2::autoarray::AutoArray<char> Cn(p,false);
				std::copy(C.begin(),C.begin()+p,Cn.begin());
				
				return Cn;
			}
Ejemplo n.º 7
0
			void broadcastSend(
				::libmaus2::network::Interface const & interface,
				unsigned short const broadcastport,
				::libmaus2::autoarray::AutoArray < ::libmaus2::network::ClientSocket::unique_ptr_type > & secondarysockets,
				unsigned int const packsize = 508
			) const
			{
				std::cerr << "Writing FI...";
				for ( uint64_t i = 0; i < secondarysockets.size(); ++i )
					secondarysockets[i]->writeString(FI.serialise());
				std::cerr << "done.";

				std::cerr << "Broadcasting designators...";
				::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport,
					secondarysockets,designators.get(),designators.size(),packsize);
				std::cerr << "done.";

				std::cerr << "Broadcasting shortpointers...";
				::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport,
					secondarysockets,shortpointers.get(),shortpointers.size(),packsize);
				std::cerr << "done.";

				std::cerr << "Broadcasting longpointers...";
				::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport,
					secondarysockets,longpointers.get(),longpointers.size(),packsize);
				std::cerr << "done.";

				std::cerr << "Broadcasting text...";
				::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport,
					secondarysockets,text.get(),text.size(),packsize);
				std::cerr << "done.";
			}
Ejemplo n.º 8
0
			/**
			 * constructor from hash intervals and file prefix
			 *
			 * @param rHI hash intervals
			 * @param fileprefix prefix for files
			 **/
			SynchronousOutputFile8ArrayTemplate(
				::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, 
				std::string const & fileprefix
			)
			: HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size()))
			{
				init ( HI->size(), fileprefix );
			}
Ejemplo n.º 9
0
			/**
			 * constructor from hash intervals and temporary file name generator
			 *
			 * @param rHI hash intervals
			 * @param tmpgen temporary file name generator object
			 **/
			SynchronousOutputFile8ArrayTemplate(
				::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, 
				::libmaus2::util::TempFileNameGenerator & tmpgen
			)
			: HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size()))
			{
				init ( HI->size(), tmpgen );
			}
Ejemplo n.º 10
0
 void serialise(stream_type & stream) const
 {
     ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blocksize);
     ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,lastblocksize);
     ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,maxblockbytes);
     ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blockstarts.size()-1);
     for ( uint64_t i = 0; i < blockstarts.size(); ++i )
         ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blockstarts[i]);
 }
Ejemplo n.º 11
0
			/**
			 * constructor from hash intervals, file names and truncate setting
			 *
			 * @param rHI hash intervals
			 * @param filenames output buffer file names
			 * @param truncate if true, then truncate files during buffer creation
			 **/
			SynchronousOutputFile8ArrayTemplate(
				::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, 
				std::vector<std::string> const & filenames,
				bool const truncate
			)
			: HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size()))
			{
				init ( filenames, truncate );
			}
Ejemplo n.º 12
0
			::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > computeSegAccu() const
			{
				::libmaus2::autoarray::AutoArray<uint64_t> preaccu(index.size()+1);
				for ( uint64_t i = 0; i < index.size(); ++i )
					preaccu[i] = index[i].size();
				preaccu.prefixSums();
				::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > accu(index.size());
				for ( uint64_t i = 1; i < preaccu.size(); ++i )
					accu[i-1] = std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i]);
				return accu;
			}
Ejemplo n.º 13
0
			void merge(DArray const & o)
			{
				if ( o.D.size() != D.size() )
				{
					::libmaus2::exception::LibMausException se;
					se.getStream() << "DArray::merge(): array sizes are not compatible." << std::endl;
					se.finish();
					throw se;
				}

				for ( uint64_t i = 0; i < D.size(); ++i )
					D[i] += o.D[i];
			}
Ejemplo n.º 14
0
			::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > computeSegmentAccu()
			{
				uint64_t const numint = index.size();
				::libmaus2::autoarray::AutoArray<uint64_t> preaccu(numint+1);
				uint64_t k = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					preaccu[k++] = index[i].size();
				preaccu.prefixSums();
				::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > accu(numint);
				for ( uint64_t i = 1; i < preaccu.size(); ++i )
					accu[i-1] = std::pair<uint64_t,uint64_t>(
						std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i])
						);
				return accu;
			}
Ejemplo n.º 15
0
			::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > computeBlockIntervals() const
			{
				uint64_t numblocks = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					numblocks += index[i].size();
				::libmaus2::autoarray::AutoArray < uint64_t > lblocksizes = ::libmaus2::autoarray::AutoArray < uint64_t >(numblocks+1);
				uint64_t * outptr = lblocksizes.begin();
				for ( uint64_t i = 0; i < blocksizes.size(); ++i )
					for ( uint64_t j = 0; j < blocksizes[i].size(); ++j )
						*(outptr++) = blocksizes[i][j];
				lblocksizes.prefixSums();
				::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > blockintervals(numblocks);
				for ( uint64_t i = 1; i < lblocksizes.size(); ++i )
					blockintervals[i-1] = std::pair<uint64_t,uint64_t>(lblocksizes[i-1],lblocksizes[i]);
				return blockintervals;
			}
Ejemplo n.º 16
0
::libmaus2::util::Histogram::unique_ptr_type libmaus2::util::Utf8String::getHistogram(::libmaus2::autoarray::AutoArray<uint8_t> const & A)
{
	#if defined(_OPENMP)
	uint64_t const numthreads = omp_get_max_threads();
	#else
	uint64_t const numthreads = 1;
	#endif
	
	::libmaus2::autoarray::AutoArray<uint64_t> const partstarts = computePartStarts(A,numthreads);
	uint64_t const numparts = partstarts.size()-1;
	
	::libmaus2::util::Histogram::unique_ptr_type hist(new ::libmaus2::util::Histogram);
	::libmaus2::parallel::OMPLock lock;
	
	#if defined(_OPENMP)
	#pragma omp parallel for
	#endif
	for ( int64_t t = 0; t < static_cast<int64_t>(numparts); ++t )
	{
		::libmaus2::util::Histogram::unique_ptr_type lhist(new ::libmaus2::util::Histogram);
	
		uint64_t codelen = 0;
		uint64_t const tcodelen = partstarts[t+1]-partstarts[t];
		::libmaus2::util::GetObject<uint8_t const *> G(A.begin()+partstarts[t]);
		
		while ( codelen != tcodelen )
			(*lhist)(::libmaus2::util::UTF8::decodeUTF8(G,codelen));
			
		lock.lock();
		hist->merge(*lhist);
		lock.unlock();
	}
	
	return UNIQUE_PTR_MOVE(hist);
}
Ejemplo n.º 17
0
				void start()
				{
					for ( uint64_t i = 0; i < threads.size(); ++i )
						threads[i]->start();

					setup();
				}
Ejemplo n.º 18
0
			CompactNumberArray(iterator a, iterator e)
			: H(getBitWT(a,e)), C(H->enctable.maxsym+1)
			{
				for ( int i = H->enctable.minsym; i <= H->enctable.maxsym; ++i )
					if ( i && H->enctable.checkSymbol(i) )
					{
						uint64_t const numsyms = H->rank(i, (e-a)-1);
						C[i] = ::libmaus2::bitio::CompactArray::unique_ptr_type(new ::libmaus2::bitio::CompactArray(numsyms,i));
						std::cerr << numsyms << " symbols use " << i << " bits " << std::endl;
					}
				
				
				uint64_t j = 0;
				for ( iterator i = a; i != e; ++i, ++j )
				{
					uint64_t const bits = ::libmaus2::math::bitsPerNum(*i);
					assert ( (*H)[j] == bits );
					
					if ( bits )
					{
						assert ( bits < C.size() );
						assert ( C[bits].get() );
						uint64_t const rank = H->rank(bits,j)-1;
						C[bits]->set( rank , *i );
					}
				}

				j = 0;
				for ( iterator i = a; i != e; ++i, ++j )
					assert ( (*this)[j] == (*i) );
			}
Ejemplo n.º 19
0
			//! set up decoder for next chunk
			bool setup()
			{
				while ( true )
				{
					// next chunk
					if ( chunkidx < chunks.size() )
					{
						wrapper.resetStream(chunks[chunkidx].first,chunks[chunkidx].second);
						chunkidx++;
						return true;
					}
					// next range
					else if ( rangeidx < ranges.size() )
					{
						rangecur = ranges[rangeidx++].get();
						chunks = rangecur->getChunks(index);
						chunkidx = 0;
					}
					// no more ranges
					else
					{
						return false;
					}
				}
			}
Ejemplo n.º 20
0
				void start(uint64_t const stacksize)
				{
					for ( uint64_t i = 0; i < threads.size(); ++i )
						threads[i]->startStack(stacksize);

					setup();
				}
Ejemplo n.º 21
0
			uint64_t sortedSymbol(uint64_t r) const
			{
				uint64_t const syms = rank_dictionaries.size();
				for ( unsigned int i = 0; i < syms; ++i )
					if ( D[syms-i-1] <= r )
						return syms-i-1;
				return 0;
			}
Ejemplo n.º 22
0
			void init()
			{
				for ( uint64_t i = 0; i < inflateB.size(); ++i )
				{
					libmaus2::lz::BgzfInflateBlock::unique_ptr_type tinflateB(new libmaus2::lz::BgzfInflateBlock(i));
					inflateB[i] = UNIQUE_PTR_MOVE(tinflateB);
					inflatefreelist.push_back(i);
				}			

				for ( uint64_t i = 0; i < inflateB.size(); ++i )
					inflategloblist.enque(
						BgzfThreadQueueElement(
					        	libmaus2::lz::BgzfThreadOpBase::libmaus2_lz_bgzf_op_read_block,
					        	i,
					        	0
						)
					);
			}
			void flush()
			{
				for ( uint64_t i = 0; i < contexts.size(); ++i )
				{
					contexts[i]->writeBit(0);
					contexts[i]->flush();
					// std::cerr << "Flushed context " << i << std::endl;
				}
			}
Ejemplo n.º 24
0
			void fillBuffer()
			{
				assert ( pc == pe );
				
				if ( setpos )
				{
					// std::cerr << "Seeking to " << readpos << std::endl;
					in.seekg(readpos);
					in.clear();
				}

				if ( in.peek() >= 0 && readpos < endpos )
				{
					#if 0
					std::cerr << "Filling block, readpos " << readpos 
						<< " stream at pos " << in.tellg() 
						<< " endpos " << endpos
						<< std::endl;
					#endif
				
					uint64_t blocksize = sizeof(uint64_t) + sizeof(uint64_t);
					
					// size of uncompressed buffer
					uint64_t const n        = ::libmaus2::util::NumberSerialisation::deserialiseNumber(in);
					// size of compressed data
					uint64_t const datasize = ::libmaus2::util::NumberSerialisation::deserialiseNumber(in);
					// add to block size
					blocksize += datasize;
						
					if ( n > B.size() )
					{
						B = ::libmaus2::autoarray::AutoArray<char>(0,false);
						B = ::libmaus2::autoarray::AutoArray<char>(n,false);
					}
					
					pa = B.begin();
					pc = pa;
					pe = pa + n;

					::libmaus2::aio::IStreamWrapper wrapper(in);
					::libmaus2::lz::IstreamSource< ::libmaus2::aio::IStreamWrapper> insource(wrapper,datasize);

					try
					{
						SnappyCompress::uncompress(insource,B.begin(),n);
					}
					catch(std::exception const & ex)
					{
						libmaus2::exception::LibMausException lme;
						lme.getStream() << "Failed to decompress snappy compressed data, comp=" << datasize << ", uncomp=" << n << ":\n" << ex.what() << "\n";
						lme.finish();
						throw lme;
					}

					readpos += blocksize;
				}
			}
Ejemplo n.º 25
0
			void printIndex() const
			{
				for ( uint64_t i = 0; i < index.size(); ++i )
				{
					std::cerr << "file " << i << " " << filenames[i] << std::endl;
					for ( uint64_t j = 0; j < index[i].size(); ++j )
						std::cerr << "(" << index[i][j].pos << "," << index[i][j].kcnt << "," << index[i][j].vcnt << ")" << std::endl;
				}
			}
			uint64_t createFinalStream(stream_type & out)
			{
				flush();

				uint64_t p = 0;
				p += ::libmaus2::util::NumberSerialisation::serialiseNumber(out,symbols); // n
				p += root->serialize(out); // huffman code tree
				p += ::libmaus2::util::NumberSerialisation::serialiseNumber(out,contexts.size()); // number of bit vectors

				std::vector<uint64_t> nodeposvec;

				for ( uint64_t i = 0; i < contexts.size(); ++i )
				{
					nodeposvec.push_back(p);

					uint64_t const blockswritten = contexts[i]->blockswritten;
					uint64_t const datawordswritten = 6*blockswritten;
					uint64_t const allwordswritten = 8*blockswritten;

					contexts[i].reset();
					tmpcnt.closeOutputTempFile(i);

					// bits written
					p += ::libmaus2::serialize::Serialize<uint64_t>::serialize(out,64*datawordswritten);
					// auto array header (words written)
					p += ::libmaus2::serialize::Serialize<uint64_t>::serialize(out,allwordswritten);
					//std::string const filename = outputfilenames[i];
					std::istream & istr = tmpcnt.openInputTempFile(i);
					// std::cerr << "Copying " << allwordswritten << " from stream " << filename << std::endl;
					::libmaus2::util::GetFileSize::copy (istr, out, allwordswritten, sizeof(uint64_t));
					p += allwordswritten * sizeof(uint64_t);
					tmpcnt.closeInputTempFile(i);

					// libmaus2::aio::FileRemoval::removeFile(filename);
				}

				uint64_t const indexpos = p;
				p += ::libmaus2::util::NumberSerialisation::serialiseNumberVector(out,nodeposvec);
				p += ::libmaus2::util::NumberSerialisation::serialiseNumber(out,indexpos);

				out.flush();

				return p;
			}
Ejemplo n.º 27
0
			::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > computeSymAccu()
			{
				uint64_t numint = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					numint += index[i].size();
				::libmaus2::autoarray::AutoArray<uint64_t> preaccu(numint+1);
				uint64_t k = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					for ( uint64_t j = 0; j < index[i].size(); ++j )
						preaccu[k++] = index[i][j].vcnt;
						
				preaccu.prefixSums();
				::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > accu(numint);
				for ( uint64_t i = 1; i < preaccu.size(); ++i )
					accu[i-1] = std::pair<uint64_t,uint64_t>(
						std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i])
						);
				return accu;
			}
Ejemplo n.º 28
0
			~FileBunchLRU()
			{
				for ( uint64_t i = 0; i < files.size(); ++i )
					if ( files[i].get() )
					{
						files[i] -> flush();
						files[i] -> close();
						files[i].reset();
					}
			}
Ejemplo n.º 29
0
				uint64_t get(uint64_t const i) const
				{
					uint64_t s = 0;
					for ( uint64_t j = 0; j < I.size(); ++j )
					{
						libmaus2::index::ExternalMemoryIndexDecoderFindLargestSmallerResult<OverlapMeta> const R = 
							I[j]->findLargestSmaller(OverlapMeta(i,0,0,0,0,0,0),true /* cache only */);
						s += R.blockid;
					}
					
					return s;
				}
Ejemplo n.º 30
0
    FastATwoBitTable()
        : T(static_cast<size_t>(std::numeric_limits<unsigned char>::max())+1,false)
    {
        assert ( 3 < T.size() );

        std::fill(T.begin(),T.end(),0);

        T['a'] = T['A'] = 0;
        T['c'] = T['C'] = 1;
        T['g'] = T['G'] = 2;
        T['t'] = T['T'] = 3;
    }