ExternalMemoryIndexGenerator(std::string const & filename)
			: Pstream(libmaus2::aio::InputOutputStreamFactoryContainer::constructUnique(filename,std::ios::in|std::ios::out|std::ios::trunc|std::ios::binary)), stream(*Pstream), 
			  ic(0), flushed(false), writeCache(1024),
			  wa(writeCache.begin()), wc(wa), we(writeCache.end())
			{
			
			}
Ejemplo n.º 2
0
			MultiRankCacheLF ( iterator BWT, uint64_t const rn, uint64_t const rmaxval = 0)
			: n(rn)
			{
				if ( n )
				{
					uint64_t maxval = rmaxval;
					for ( uint64_t i = 0; i < n; ++i )
						maxval = std::max ( maxval, static_cast<uint64_t>(BWT[i]) );
						
					rank_dictionaries = ::libmaus2::autoarray::AutoArray < rank_ptr_type >(maxval+1);
					
					for ( uint64_t i = 0; i < rank_dictionaries.size(); ++i )
					{
						rank_ptr_type trank_dictionariesi(new rank_type(n+1));
						rank_dictionaries[i] = UNIQUE_PTR_MOVE(trank_dictionariesi);
						writer_type writer = rank_dictionaries[i]->getWriteContext();
						
						for ( uint64_t j = 0; j < n; ++j )
							writer.writeBit(BWT[j] == i);
						// write additional bit to make rankm1 defined for n
						writer.writeBit(0);
						
						writer.flush();
					}
					
					D = ::libmaus2::autoarray::AutoArray < uint64_t >(rank_dictionaries.size()+1);
					for ( uint64_t i = 0; i < rank_dictionaries.size(); ++i )
						D [ i ] = rank_dictionaries[i]->rank1(n-1);
					D.prefixSums();
				}
			}
Ejemplo n.º 3
0
			uint64_t operator[](uint64_t const i) const
			{
				for ( uint64_t j = 0; j < rank_dictionaries.size(); ++j )
					if ( (*(rank_dictionaries[j]))[i] )
						return j;
				return rank_dictionaries.size();
			}
Ejemplo n.º 4
0
::libmaus2::util::Histogram::unique_ptr_type libmaus2::util::Utf8String::getHistogram(::libmaus2::autoarray::AutoArray<uint8_t> const & A)
{
	#if defined(_OPENMP)
	uint64_t const numthreads = omp_get_max_threads();
	#else
	uint64_t const numthreads = 1;
	#endif
	
	::libmaus2::autoarray::AutoArray<uint64_t> const partstarts = computePartStarts(A,numthreads);
	uint64_t const numparts = partstarts.size()-1;
	
	::libmaus2::util::Histogram::unique_ptr_type hist(new ::libmaus2::util::Histogram);
	::libmaus2::parallel::OMPLock lock;
	
	#if defined(_OPENMP)
	#pragma omp parallel for
	#endif
	for ( int64_t t = 0; t < static_cast<int64_t>(numparts); ++t )
	{
		::libmaus2::util::Histogram::unique_ptr_type lhist(new ::libmaus2::util::Histogram);
	
		uint64_t codelen = 0;
		uint64_t const tcodelen = partstarts[t+1]-partstarts[t];
		::libmaus2::util::GetObject<uint8_t const *> G(A.begin()+partstarts[t]);
		
		while ( codelen != tcodelen )
			(*lhist)(::libmaus2::util::UTF8::decodeUTF8(G,codelen));
			
		lock.lock();
		hist->merge(*lhist);
		lock.unlock();
	}
	
	return UNIQUE_PTR_MOVE(hist);
}
Ejemplo n.º 5
0
			void serialise(stream_type & stream) const
			{
				::libmaus2::util::NumberSerialisation::serialiseNumber(stream,D.size());

				for ( uint64_t i = 0; i < D.size(); ++i )
					::libmaus2::util::NumberSerialisation::serialiseNumber(stream,D[i]);
			}
Ejemplo n.º 6
0
			::std::streampos seekpos(::std::streampos sp, ::std::ios_base::openmode which = ::std::ios_base::in | ::std::ios_base::out)
			{
				if ( which & ::std::ios_base::in )
				{
					int64_t const cur = symsread-(egptr()-gptr());
					int64_t const curlow = cur - static_cast<int64_t>(gptr()-eback());
					int64_t const curhigh = cur + static_cast<int64_t>(egptr()-gptr());
					
					// call relative seek, if target is in range
					if ( sp >= curlow && sp <= curhigh )
						return seekoff(static_cast<int64_t>(sp) - cur, ::std::ios_base::cur, which);

					// target is out of range, we really need to seek
					uint64_t tsymsread = (sp / buffersize)*buffersize;
					
					symsread = tsymsread;
					stream.clear();
					stream.seekg( (symsread * b) / 8 );
					setg(buffer.end(),buffer.end(),buffer.end());
					underflow();
					setg(eback(),gptr() + (static_cast<int64_t>(sp)-static_cast<int64_t>(tsymsread)), egptr());
				
					return sp;
				}
				
				return -1;
			}
Ejemplo n.º 7
0
			SocketOutputBufferTemplate(
				::libmaus2::network::SocketBase * rdst, 
				int const rtag,
				uint64_t const bufsize)
			: dst(rdst), tag(rtag), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN())
			{
			}
Ejemplo n.º 8
0
			::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > computeSymAccu() const
			{
				uint64_t numint = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					numint += index[i].size();
				::libmaus2::autoarray::AutoArray<uint64_t> preaccu(numint+1);
				uint64_t outptr = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					for ( uint64_t j = 0; j < index[i].size(); ++j )
						preaccu[outptr++] = index[i][j].vcnt;
				preaccu.prefixSums();
				::libmaus2::autoarray::AutoArray < std::pair<uint64_t,uint64_t> > symaccu(numint);
				for ( uint64_t i = 1; i < preaccu.size(); ++i )
					symaccu[i-1] = std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i]);
	
				#if 0
				std::cerr << "presymaccu:" << std::endl;
				for ( uint64_t i = 0; i < preaccu.size(); ++i )
					std::cerr << preaccu[i] << std::endl;

				std::cerr << "symaccu:" << std::endl;
				for ( uint64_t i = 0; i < symaccu.size(); ++i )
					std::cerr << "[" << i << "]=[" << symaccu[i].first << "," << symaccu[i].second << ")" << std::endl;
				#endif
					
				return symaccu;
			}
Ejemplo n.º 9
0
                        /**
                         * constructor by output stream
                         *
                         * @param out output stream
                         * @param bufsize output buffer size
                         **/
                        SynchronousGenericOutput(std::ostream & out, uint64_t const bufsize)
                        : B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), 
                          W(out),
                          datawrittentofile(0)
                        {

                        }
Ejemplo n.º 10
0
			int_type underflow()
			{
				if ( gptr() < egptr() )
					return static_cast<int_type>(*uptr());
					
				assert ( gptr() == egptr() );
					
				char * midptr = buffer.begin() + pushbackspace;
				uint64_t const copyavail = 
					std::min(
						// previously read
						static_cast<uint64_t>(gptr()-eback()),
						// space we have to copy into
						static_cast<uint64_t>(midptr-buffer.begin())
					);
				::std::memmove(midptr-copyavail,gptr()-copyavail,copyavail);

				stream.read(midptr, buffer.end()-midptr);
				size_t const n = stream.gcount();
				streamreadpos += n;

				setg(midptr-copyavail, midptr, midptr+n);

				if (!n)
					return traits_type::eof();
				
				return static_cast<int_type>(*uptr());
			}
Ejemplo n.º 11
0
			bool decodeBlock()
			{
				while ( FBO.file < index.Vfn.size() && FBO.block >= index.blocksPerFile[FBO.file] )
				{
					FBO.file++;
					FBO.block = 0;
					FBO.blockoffset = 0; // check this if we change the file format
					FBO.offset = 0;
					openFile();
				}
				if ( FBO.file == index.Vfn.size() )
				{
					PSGI.reset();
					PISI.reset();
					return false;
				}

				libmaus2::gamma::GammaDecoder< libmaus2::aio::SynchronousGenericInput<uint64_t> > GD(*PSGI);
				uint64_t const bs = GD.decode() + 1;
				B.ensureSize(bs);
				for ( uint64_t i = 0; i < bs; ++i )
					B[i] = GD.decode();

				pa = B.begin();
				pc = B.begin();
				pe = B.begin() + bs;

				FBO.block += 1;

				return true;
			}
Ejemplo n.º 12
0
			void init(bool const repos)
			{
				// set empty buffer
				setgchecked(buffer.end(), buffer.end(), buffer.end());
				// seek
				if ( repos )
					fd->lseek(symsread,SEEK_SET);
			}
Ejemplo n.º 13
0
			void serialise(std::ostream & out) const
			{
				::libmaus2::fastx::FastInterval::serialise(out,FI);
				designators.serialize(out);
				shortpointers.serialize(out);
				longpointers.serialize(out);
				text.serialize(out);
			}
Ejemplo n.º 14
0
			StreamWrapperBuffer(stream_type & rstream, ::std::size_t rbuffersize, std::size_t rpushbackspace)
			: stream(rstream), 
			  buffersize(rbuffersize),
			  pushbackspace(rpushbackspace),
			  buffer(buffersize+pushbackspace,false), streamreadpos(0)
			{
				setg(buffer.end(), buffer.end(), buffer.end());	
			}
Ejemplo n.º 15
0
			void init(bool const repos)
			{
				// set empty buffer
				setgchecked(buffer.end(), buffer.end(), buffer.end());
				// seek
				if ( repos )
					stream.lseek(symsread);
			}
Ejemplo n.º 16
0
			BgzfParallelRecodeDeflateBase()
			: B(getBgzfMaxBlockSize(),false), 
			  pa(B.begin()), 
			  pc(B.begin()), 
			  pe(B.end())
			{
			
			}
			LinuxStreamingPosixFdOutputStreamBuffer(std::string const & fn, int64_t const rbuffersize)
			: fd(doOpen(fn)), closefd(true), 
			  optblocksize((rbuffersize < 0) ? getOptimalIOBlockSize(fd,std::string()) : rbuffersize),
			  buffersize(optblocksize),
			  buffer(buffersize,false), prevwrite(0,0)
			{
				setp(buffer.begin(),buffer.end()-1);
			}
Ejemplo n.º 18
0
			/**
			 * constructor from hash intervals and file prefix
			 *
			 * @param rHI hash intervals
			 * @param fileprefix prefix for files
			 **/
			SynchronousOutputFile8ArrayTemplate(
				::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, 
				std::string const & fileprefix
			)
			: HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size()))
			{
				init ( HI->size(), fileprefix );
			}
Ejemplo n.º 19
0
			/**
			 * constructor from hash intervals and temporary file name generator
			 *
			 * @param rHI hash intervals
			 * @param tmpgen temporary file name generator object
			 **/
			SynchronousOutputFile8ArrayTemplate(
				::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, 
				::libmaus2::util::TempFileNameGenerator & tmpgen
			)
			: HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size()))
			{
				init ( HI->size(), tmpgen );
			}
Ejemplo n.º 20
0
			MemoryOutputStreamBuffer(std::string const & fn, int64_t const rbuffersize)
			: 
			  fd(doOpen(fn)), 
			  buffersize((rbuffersize < 0) ? getDefaultBlockSize() : rbuffersize), 
			  buffer(buffersize,false)
			{
				setp(buffer.begin(),buffer.end()-1);
			}
Ejemplo n.º 21
0
			/**
			 * constructor from hash intervals, file names and truncate setting
			 *
			 * @param rHI hash intervals
			 * @param filenames output buffer file names
			 * @param truncate if true, then truncate files during buffer creation
			 **/
			SynchronousOutputFile8ArrayTemplate(
				::libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & rHI, 
				std::vector<std::string> const & filenames,
				bool const truncate
			)
			: HI(&rHI), buffers(HI->size()), IT(new ::libmaus2::util::IntervalTree(*HI,0,HI->size()))
			{
				init ( filenames, truncate );
			}
Ejemplo n.º 22
0
 void serialise(stream_type & stream) const
 {
     ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blocksize);
     ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,lastblocksize);
     ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,maxblockbytes);
     ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blockstarts.size()-1);
     for ( uint64_t i = 0; i < blockstarts.size(); ++i )
         ::libmaus2::util::NumberSerialisation::serialiseNumber(stream,blockstarts[i]);
 }
Ejemplo n.º 23
0
			/**
			 * constructor by file name
			 *
			 * @param filename name of output file
			 * @param bufsize size of output buffer
			 * @param truncate true if file should be truncated false data should be appended
			 * @param offset write offset in bytes
			 **/
                        SynchronousGenericOutput(std::string const & filename, uint64_t const bufsize, bool const truncate = true, uint64_t const offset = 0, bool const /* metasync */ = true)
                        : B(bufsize,false), pa(B.get()), pc(pa), pe(pa+B.getN()), 
                          PW ( truncate ? new ofstream_type(filename) : 0),
                          PF ( truncate ? 0 : new std::fstream(filename.c_str(), std::ios::binary|std::ios::in|std::ios::out|std::ios::ate) ),
                          W  ( truncate ? (static_cast<std::ostream &>(*PW)) : (static_cast<std::ostream &>(*PF)) ),
                          datawrittentofile(0)
                        {
                        	W.seekp(offset,std::ios::beg);
                        }
Ejemplo n.º 24
0
			/**
			 * @return estimated size of object in bytes
			 **/
			uint64_t byteSize() const
			{
				return
					sizeof(uint64_t)+
					B.byteSize()+
					R->byteSize()+
					A8.byteSize()+
					A64.byteSize();
			}
Ejemplo n.º 25
0
                        void exec()
                        {
                                pid = fork();
                                
                                if ( pid < 0 )
                                {
                                        ::libmaus2::exception::LibMausException ex;
                                        ex.getStream() << "failed to fork: " << strerror(errno);
                                        ex.finish();
                                        throw ex;
                                }
                                
                                if ( ! pid )
                                {
                                	signal(SIGCHLD,sigchildhandler);
                                
                                        while ( true )
                                        {        
                                                try
                                                {
	                                                ::libmaus2::network::SocketBase::unique_ptr_type recsock = seso->accept();
	                                                
                                                        pid_t childpid = fork();
                                                        
                                                        if ( childpid == 0 )
                                                        {
                                                        	try
                                                        	{
                                                        		char const * ptr = data.begin();
                                                        		char const * ptre = data.end();
                                                        		uint64_t const bs = 4096;
                                                        		
                                                        		while ( ptr != ptre )
                                                        		{
                                                        			uint64_t const rest = ptre-ptr;
                                                        			uint64_t const towrite = std::min(bs,rest); 
                                                        			recsock->write(ptr,towrite);
                                                        			ptr += towrite;
                                                        		}
								}
								catch(std::exception const & ex)
								{
									std::cerr << ex.what() << std::endl;
								}

                                                                _exit(0);
                                                        }
                                                }
                                                catch(std::exception const & ex)
                                                {
                                                        std::cerr << "Error in SingleFileServer: " << ex.what() << std::endl;
                                                }
                                        }
                                
                                        _exit(0);
                                }
                        }
Ejemplo n.º 26
0
			void fillBuffer()
			{
				assert ( pc == pe );
				
				if ( setpos )
				{
					// std::cerr << "Seeking to " << readpos << std::endl;
					in.seekg(readpos);
					in.clear();
				}

				if ( in.peek() >= 0 && readpos < endpos )
				{
					#if 0
					std::cerr << "Filling block, readpos " << readpos 
						<< " stream at pos " << in.tellg() 
						<< " endpos " << endpos
						<< std::endl;
					#endif
				
					uint64_t blocksize = sizeof(uint64_t) + sizeof(uint64_t);
					
					// size of uncompressed buffer
					uint64_t const n        = ::libmaus2::util::NumberSerialisation::deserialiseNumber(in);
					// size of compressed data
					uint64_t const datasize = ::libmaus2::util::NumberSerialisation::deserialiseNumber(in);
					// add to block size
					blocksize += datasize;
						
					if ( n > B.size() )
					{
						B = ::libmaus2::autoarray::AutoArray<char>(0,false);
						B = ::libmaus2::autoarray::AutoArray<char>(n,false);
					}
					
					pa = B.begin();
					pc = pa;
					pe = pa + n;

					::libmaus2::aio::IStreamWrapper wrapper(in);
					::libmaus2::lz::IstreamSource< ::libmaus2::aio::IStreamWrapper> insource(wrapper,datasize);

					try
					{
						SnappyCompress::uncompress(insource,B.begin(),n);
					}
					catch(std::exception const & ex)
					{
						libmaus2::exception::LibMausException lme;
						lme.getStream() << "Failed to decompress snappy compressed data, comp=" << datasize << ", uncomp=" << n << ":\n" << ex.what() << "\n";
						lme.finish();
						throw lme;
					}

					readpos += blocksize;
				}
			}
Ejemplo n.º 27
0
			SymBitEncoderBaseTemplate(bit_writer_type & rwriter, uint64_t const bufsize = 64*1024ull)
			: writer(rwriter),
			  symcntruns(bufsize),
			  ra(symcntruns.begin()),
			  rc(symcntruns.begin()),
			  re(symcntruns.end()),
			  currun(std::numeric_limits<int64_t>::min(),false,0),
			  indexwritten(false)
			{
			}
Ejemplo n.º 28
0
			GammaRLEncoder(std::string const & filename, unsigned int const ralbits, uint64_t const n, uint64_t const rblocksize, uint64_t const rbufsize = 64*1024)
			: 
			  blocksize(rblocksize),
			  COS(filename), SGO(COS,rbufsize), GE(SGO), 
			  A(blocksize), pa(A.begin()), pc(pa), pe(A.end()), 
			  cursym(0), curcnt(0), indexwritten(false), albits(ralbits)
			{
				SGO.put(n);
				SGO.put(albits);
			}
Ejemplo n.º 29
0
			GraphEdgeBlockBuffer(std::string const & filename, uint64_t const bufsize)
			: 
				COS(filename),
				B(bufsize,false),
				pa(B.begin()),
				pc(pa),
				pe(B.end())
			{
			
			}
Ejemplo n.º 30
0
			SocketInputStreamBuffer(::libmaus2::network::SocketInputInterface & rstream, uint64_t const rblocksize, uint64_t const rputbackspace = 0)
			: 
			  stream(rstream),
			  blocksize(rblocksize),
			  putbackspace(rputbackspace),
			  buffer(putbackspace + blocksize,false)
			{
				// set empty buffer
				setg(buffer.end(), buffer.end(), buffer.end());
			}