Пример #1
0
			StreamWrapperBuffer(stream_type & rstream, ::std::size_t rbuffersize, std::size_t rpushbackspace)
			: stream(rstream), 
			  buffersize(rbuffersize),
			  pushbackspace(rpushbackspace),
			  buffer(buffersize+pushbackspace,false), streamreadpos(0)
			{
				setg(buffer.end(), buffer.end(), buffer.end());	
			}
Пример #2
0
			void init(bool const repos)
			{
				// set empty buffer
				setg(buffer.end(), buffer.end(), buffer.end());
				// seek
				if ( repos )
					stream->seek(symsread,SEEK_SET);
			}
Пример #3
0
			void init(bool const repos)
			{
				// set empty buffer
				setg(buffer.end(), buffer.end(), buffer.end());
				// seek
				if ( repos )
					stream.lseek(symsread);
			}
			MdStringComputationContext()
			: T0(256,false), T1(256,false), nm(0)
			{
				std::fill(T0.begin(),T0.end(),4);
				std::fill(T1.begin(),T1.end(),5);
				T0['A'] = T0['a'] =  T1['A'] = T1['a'] = 0;
				T0['C'] = T0['c'] =  T1['C'] = T1['c'] = 1;
				T0['G'] = T0['g'] =  T1['G'] = T1['g'] = 2;
				T0['T'] = T0['t'] =  T1['T'] = T1['t'] = 3;
				auxvec.set("MD");
				auxvec.set("NM");
			}
Пример #5
0
			int_type underflow()
			{
				if ( gptr() < egptr() )
					return static_cast<int_type>(*uptr());
					
				assert ( gptr() == egptr() );
					
				char * midptr = buffer.begin() + pushbackspace;
				uint64_t const copyavail = 
					std::min(
						// previously read
						static_cast<uint64_t>(gptr()-eback()),
						// space we have to copy into
						static_cast<uint64_t>(midptr-buffer.begin())
					);
				::std::memmove(midptr-copyavail,gptr()-copyavail,copyavail);

				stream.read(midptr, buffer.end()-midptr);
				size_t const n = stream.gcount();
				streamreadpos += n;

				setg(midptr-copyavail, midptr, midptr+n);

				if (!n)
					return traits_type::eof();
				
				return static_cast<int_type>(*uptr());
			}
			LinuxStreamingPosixFdOutputStreamBuffer(std::string const & fn, int64_t const rbuffersize)
			: fd(doOpen(fn)), closefd(true), 
			  optblocksize((rbuffersize < 0) ? getOptimalIOBlockSize(fd,std::string()) : rbuffersize),
			  buffersize(optblocksize),
			  buffer(buffersize,false), prevwrite(0,0)
			{
				setp(buffer.begin(),buffer.end()-1);
			}
			BgzfParallelRecodeDeflateBase()
			: B(getBgzfMaxBlockSize(),false), 
			  pa(B.begin()), 
			  pc(B.begin()), 
			  pe(B.end())
			{
			
			}
Пример #8
0
			GammaRLEncoder(std::string const & filename, unsigned int const ralbits, uint64_t const n, uint64_t const rblocksize, uint64_t const rbufsize = 64*1024)
			: 
			  blocksize(rblocksize),
			  COS(filename), SGO(COS,rbufsize), GE(SGO), 
			  A(blocksize), pa(A.begin()), pc(pa), pe(A.end()), 
			  cursym(0), curcnt(0), indexwritten(false), albits(ralbits)
			{
				SGO.put(n);
				SGO.put(albits);
			}
Пример #9
0
			element_type * get()
			{
				if ( ! freelistfill )
				{
					// allocate more alignment objects
					libmaus::autoarray::AutoArray<element_type *> nalloclist(
						std::max(
							static_cast<uint64_t>(1),
							static_cast<uint64_t>(2*alloclist.size())
						)
						,false
					);

					std::copy(alloclist.begin(),alloclist.end(),nalloclist.begin());
					element_type * nullp = 0;
					std::fill(nalloclist.begin()+alloclist.size(),nalloclist.end(),nullp);
					
					for ( element_type ** p = nalloclist.begin()+alloclist.size();
						p != nalloclist.end(); ++p )
						*p = new element_type;
					
					libmaus::autoarray::AutoArray<element_type *> nfreelist(
						std::max(
							static_cast<uint64_t>(1),
							static_cast<uint64_t>(2*freelist.size())
						)
						,false			
					);
					
					std::copy(freelist.begin(),freelist.end(),nfreelist.begin());
					std::fill(nfreelist.begin()+freelist.size(),nfreelist.end(),nullp);
				
					freelist = nfreelist;
					
					for ( element_type ** p = nalloclist.begin()+alloclist.size();
						p != nalloclist.end(); ++p )
						freelist[freelistfill++] = *p;			
					
					alloclist = nalloclist;
				}
				
				return freelist[--freelistfill];
			}
Пример #10
0
			void put(libmaus::fastx::FastQReader::pattern_type const & pattern)
			{
				uint64_t const patlen = getFastQLength(pattern);
				
				while ( (C.end() - pc) < static_cast<ptrdiff_t>(patlen) )
				{
					uint64_t const off = pc-C.begin();
					uint64_t const newclen = std::max(2*C.size(),static_cast<uint64_t>(1ull));
					C.resize(newclen);
					pc = C.begin()+off;
				}

				*(pc)++ = '@';
				std::copy(pattern.sid.begin(),pattern.sid.end(),pc);
				pc += pattern.sid.size();
				*(pc++) = '\n';

				std::copy(pattern.spattern.begin(), pattern.spattern.end(),pc);
				pc += pattern.spattern.size();
				*(pc++) = '\n';

				*(pc)++ = '+';
				std::copy(pattern.plus.begin(), pattern.plus.end(),pc);
				pc += pattern.plus.size();
				*(pc++) = '\n';

				std::copy(pattern.quality.begin(), pattern.quality.end(),pc);
				pc += pattern.quality.size();
				*(pc++) = '\n';
				
				assert ( pc <= C.end() );
				
				lnumsyms += pattern.spattern.size();
				minlen = std::min(minlen,static_cast<uint64_t>(pattern.spattern.size()));
				maxlen = std::max(maxlen,static_cast<uint64_t>(pattern.spattern.size()));
				pathigh++;
				
				if ( pathigh - patlow == patperblock )
					internalFlush();
			}
Пример #11
0
		void checkSpace(uint64_t const outlen)
		{
			// buffer overflow?
			if ( freeSpace() < outlen )
			{
				flush();
				assert ( opc == opa );
			
				if ( outlen > outbuf.size() )
				{
					::libmaus::autoarray::AutoArray<uint8_t> newbuf(outlen);	
					std::copy( outbuf.begin(), outbuf.end(), newbuf.begin() );
					
					outbuf = newbuf;
					opa = outbuf.begin();
					opc = opa;
					ope = outbuf.end();
				}
			}
			
			assert ( freeSpace() >= outlen );		
		}
			ConstantStringHash(iterator ita, iterator ite, uint64_t const maxn = 64*1024)
			{
				k = 0;
				n = (1 << k);
				m = 0;
				bool ok = false;
				
				for ( ; (! ok) && n <= maxn; ++k, n <<= 1, m = (m << 1)|1 )
				{
					libmaus::autoarray::AutoArray<uint64_t> C(n);

					for ( iterator it = ita; it != ite; ++it )
						C [ it->hash() & m ] ++;
						
					ok = true;
					for ( uint64_t i = 0; i < n; ++i )
						ok = ok && C[i] <= 1;
				}
				
				if ( ! ok )
				{
					libmaus::exception::LibMausException se;
					se.getStream() << "Cannot create perfect hash of size <= " << maxn << " for " << ite-ita << " elements" << std::endl;
					se.finish();
					throw se;
				}
				
				H = libmaus::autoarray::AutoArray<int64_t>(n);
				std::fill(H.begin(),H.end(),-1);
				
				for ( iterator it = ita; it != ite; ++it )
					H [ it->hash() & m ] = it-ita;

				for ( iterator it = ita; it != ite; ++it )
					assert ( H [ it->hash() & m ] == it-ita );
			}
Пример #13
0
			pair_type const * end() const { return H.end(); }
Пример #14
0
			SimpleHashMap(unsigned int const rslog)
			: slog(rslog), hashsize(1ull << slog), hashmask(hashsize-1), fill(0), H(hashsize,false)
			{
				std::fill(H.begin(),H.end(),pair_type(base_type::unused(),value_type()));
			}
Пример #15
0
			pair_type * end() { return H.end(); }
Пример #16
0
			void resize(uint64_t const tracelen)
			{
				trace = ::libmaus::autoarray::AutoArray<step_type>(tracelen,false);
				te = trace.end();
				ta = te;
			}
Пример #17
0
			AlignmentTraceContainer(uint64_t const tracelen = 0)
			: trace(tracelen), te(trace.end()), ta(te)
			{
			
			}
Пример #18
0
			const_iterator end() const
			{
				return A.end();
			}
Пример #19
0
			key_type * end() { return H.end(); }
			BgzfDeflateInputBufferBase(uint64_t const bufsize = getBgzfMaxBlockSize()) : inbuf(bufsize,false), pa(inbuf.begin()), pc(pa), pe(inbuf.end())
			{
				assert ( bufsize <= getBgzfMaxBlockSize() );
			}
Пример #21
0
		OutputBuffer(uint64_t bufsize, std::ostream & rout)
		: outbuf(bufsize,false), opa(outbuf.begin()), opc(opa), ope(outbuf.end()), out(rout), written(0), ST()
		{}
Пример #22
0
int main(int argc, char * argv[])
{
	try
	{
		::libmaus::util::ArgInfo const arginfo(argc,argv);
		std::string const input = arginfo.getRestArg<std::string>(0);
		std::string const output = arginfo.getRestArg<std::string>(1);
		unsigned int const verbose = arginfo.getValue<unsigned int>("verbose",1);
		unsigned int const addterm = arginfo.getValue<unsigned int>("addterm",0) ? 1 : 0;

		::libmaus::autoarray::AutoArray<uint64_t> const chist = computeCharHist(input);
		uint64_t maxsym = 0;
		for ( uint64_t i = 0; i < chist.size(); ++i )
			if ( chist[i] )
				maxsym = i;
		if ( addterm )
			maxsym += 1;
		unsigned int const b = maxsym ? (64-::libmaus::bitio::Clz::clz(maxsym)) : 0;

		uint64_t const n = std::accumulate(chist.begin(),chist.end(),0ull);
		if ( verbose )
			std::cerr << "[V] n=" << n << " maxsym=" << maxsym << " b=" << b << std::endl;				

		uint64_t const blocksize = 8*1024;
		uint64_t const numblocks = (n+blocksize-1)/blocksize;
		::libmaus::autoarray::AutoArray<uint8_t> B(blocksize);
		::libmaus::aio::CheckedInputStream CIS(input);
		::libmaus::bitio::CompactArrayWriter CAW(output,n+addterm,b);
		int64_t lastperc = -1;
		
		if ( verbose )
			std::cerr << "[V] ";
			
		for ( uint64_t b = 0; b < numblocks; ++b )
		{
			uint64_t const low = std::min(b*blocksize,n);
			uint64_t const high = std::min(low+blocksize,n);
			uint64_t const range = high-low;
			
			CIS.read ( reinterpret_cast<char *>(B.begin()), range );
			assert ( CIS.gcount() == static_cast<int64_t>(range) );
			
			if ( addterm )
				for ( uint64_t i = 0; i < range; ++i )
					B[i] += 1;
			
			CAW.write(B.begin(),range);
			
			int64_t const newperc = (high * 100) / n;
			if ( verbose && newperc != lastperc )
			{
				lastperc = newperc;
				std::cerr << "(" << newperc << ")";
			}
		}
		if ( addterm )
			CAW.put(0);
		if ( verbose )
			std::cerr << std::endl;
		
		CAW.flush();
		
		#if 0
		::libmaus::bitio::CompactDecoderWrapper CDW(output);
		for ( uint64_t i = 0; i < n+addterm; ++i )
			std::cerr << CDW.get();
		std::cerr << std::endl;
		#endif
	}
	catch(std::exception const & ex)
	{
		std::cerr << ex.what() << std::endl;
	}
}
Пример #23
0
	ConsensusAux() : M(256), C(256)
	{
		std::fill(M.begin(),M.end(),1);
		std::fill(C.begin(),C.end(),0);
	}
Пример #24
0
			Buffer(uint64_t const bufsize = 8*1024)
			: A(bufsize,false), pa(A.begin()), pc(pa), pe(A.end())
			{

			}