Пример #1
0
libmaus::bitio::IndexedBitVector::unique_ptr_type libmaus::util::GenericIntervalTree::computeNonEmptyBV(::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & V)
{
	::libmaus::bitio::IndexedBitVector::unique_ptr_type BV(new ::libmaus::bitio::IndexedBitVector(V.size()));
	for ( uint64_t i = 0; i < V.size(); ++i )
		(*BV)[i] = (V[i].first != V[i].second);
	BV->setupIndex();
	return UNIQUE_PTR_MOVE(BV);
}
Пример #2
0
			CharTermTable(uint8_t c)
			: atable(257), table(atable.get()+1)
			{
				for ( unsigned int i = 0; i < 256; ++i )
					table[i] = false;
				table[-1] = true;
				table[c] = true;
			}
Пример #3
0
			void reset()
			{
				lnumsyms = 0;
				minlen = std::numeric_limits<uint64_t>::max();
				maxlen = 0;
				pc = C.begin();
				p = 0;
			}
                        SynchronousGenericOutputPosix(
                                std::string const & rfilename, 
                                uint64_t const bufsize, 
                                bool const truncate,
                                uint64_t const offset,
                                bool const rmetasync = true
                        )
                        : filename(rfilename), dirname(::libmaus::util::ArgInfo::getDirName(filename)), metasync(rmetasync),
                          B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), 
                          fd ( -1 ),
                          totalwrittenbytes(0), totalwrittenwords(0)
                        {
                                while ( (fd = ::open(filename.c_str(), truncate ? (O_WRONLY|O_TRUNC|O_CREAT) : O_WRONLY , 0755 )) < 0 )
                                {
                                	switch ( errno )
                                	{
                                		case EINTR:
                                		{
                                			std::cerr << "Restarting open() system call interupted by signal." << std::endl;
                                			break;
                                		}
                                		default:
                                		{
		                                        ::libmaus::exception::LibMausException se;
        		                                se.getStream() << "Failed to open file "<< filename <<" in SynchronousGenericOutputPosix: " <<
                		                                strerror(errno);
                        		                se.finish();
                                		        throw se;
						}
					}
                                }
                                if ( lseek ( fd, offset, SEEK_SET) == static_cast<off_t>(-1) )
                                {
                                        close(fd);
                                        ::libmaus::exception::LibMausException se;
                                        se.getStream() << "Failed to seek " << filename << " in SynchronousGenericOutputPosix: " <<
                                                strerror(errno);
                                        se.finish();
                                        throw se;
                                }
                                
                                #if 0
                                std::cerr << "File " << filename << " opened for output in "
                                        << ::libmaus::util::Demangle::demangle<this_type>() << std::endl;
                                #endif
                        }
Пример #5
0
			void getPattern(pattern_type & pat, uint64_t i) const
			{
				GetObject G(T.begin()+(*dict)[i - dict->FI.low]);
				::libmaus::fastx::CompactFastQContext C;
				C.nextid = i;
				::libmaus::fastx::CompactFastQDecoderBase::decodePattern<GetObject>(G,*H,C,pat);
				pat.patid = i;
			}
Пример #6
0
			void serialise(std::ostream & out) const
			{
				::libmaus::util::NumberSerialisation::serialiseNumber(out,slog);
				::libmaus::util::NumberSerialisation::serialiseNumber(out,hashsize);
				::libmaus::util::NumberSerialisation::serialiseNumber(out,hashmask);
				::libmaus::util::NumberSerialisation::serialiseNumber(out,fill);
				H.serialize(out);
			}
Пример #7
0
			unique_ptr_type extend() const
			{
				unique_ptr_type O(new this_type(slog+1));
				for ( uint64_t i = 0; i < H.size(); ++i )
					if ( H[i].first != base_type::unused() )
						O->insert ( H[i].first, H[i].second );
				return UNIQUE_PTR_MOVE(O);
			}
Пример #8
0
			void cleanup()
			{
				for ( uint64_t i = 0; i < alloclist.size(); ++i )
					delete alloclist[i];
				alloclist = libmaus::autoarray::AutoArray<element_type *>(0);	
				freelist = libmaus::autoarray::AutoArray<element_type *>(0);	
				freelistfill = 0;
			}
			uint64_t createFinalStream(stream_type & out)
			{			
				flush();

				uint64_t p = 0;
				p += ::libmaus::util::NumberSerialisation::serialiseNumber(out,symbols); // n
				p += root->serialize(out); // huffman code tree
				p += ::libmaus::util::NumberSerialisation::serialiseNumber(out,contexts.size()); // number of bit vectors
				
				std::vector<uint64_t> nodeposvec;

				for ( uint64_t i = 0; i < contexts.size(); ++i )
				{
					nodeposvec.push_back(p);
				
					uint64_t const blockswritten = contexts[i]->blockswritten;
					uint64_t const datawordswritten = 6*blockswritten;
					uint64_t const allwordswritten = 8*blockswritten;
						
					contexts[i].reset();
					tmpcnt.closeOutputTempFile(i);	
					
					// bits written
					p += ::libmaus::serialize::Serialize<uint64_t>::serialize(out,64*datawordswritten);
					// auto array header (words written)
					p += ::libmaus::serialize::Serialize<uint64_t>::serialize(out,allwordswritten);
					//std::string const filename = outputfilenames[i];
					//::libmaus::aio::CheckedInputStream istr(filename);
					std::istream & istr = tmpcnt.openInputTempFile(i);
					// std::ifstream istr(filename.c_str(),std::ios::binary);
					// std::cerr << "Copying " << allwordswritten << " from stream " << filename << std::endl;
					::libmaus::util::GetFileSize::copy (istr, out, allwordswritten, sizeof(uint64_t));
					p += allwordswritten * sizeof(uint64_t);
					tmpcnt.closeInputTempFile(i);

					// remove(filename.c_str());
				}
				
				uint64_t const indexpos = p;
				p += ::libmaus::util::NumberSerialisation::serialiseNumberVector(out,nodeposvec);
				p += ::libmaus::util::NumberSerialisation::serialiseNumber(out,indexpos);
					
				out.flush();
				
				return p;
			}
Пример #10
0
libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > libmaus::util::GenericIntervalTree::computeNonEmpty(::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > const & V)
{
	uint64_t nonempty = 0;
	for ( uint64_t i = 0; i < V.size(); ++i )
		if ( V[i].first != V[i].second )
			nonempty++;

	if ( nonempty == 0 )
		std::cerr << "all of the " << V.size() << " intervals are empty." << std::endl;

	::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > R(nonempty);
	nonempty = 0;
	for ( uint64_t i = 0; i < V.size(); ++i )
		if ( V[i].first != V[i].second )
			R [ nonempty++ ] = V[i];
	return R;
}
Пример #11
0
			BamSeqEncodeTable()
			: A(256)
			{
				char const * s = "=ACMGRSVTWYHKDBN";
				for ( uint64_t i = 0; i < A.size(); ++i )
					A[i] = strlen(s);
				for ( uint64_t i = 0; i < strlen(s); ++i )
					A [ s[i] ] = i;
			}
			void flush()
			{
				for ( uint64_t i = 0; i < contexts.size(); ++i )
				{
					contexts[i]->writeBit(0);
					contexts[i]->flush();
					// std::cerr << "Flushed context " << i << std::endl;
				}
			}
Пример #13
0
			BgzfDeflateParallel(std::ostream & rdeflateout, uint64_t const rnumthreads, uint64_t const rnumbuffers, int const level, std::ostream * rdeflateindexostr = 0)
			: deflategloblist(), deflatecontext(deflategloblist,rdeflateout,rnumbuffers,level,BgzfDeflateParallelContext::getDefaultDeflateGetCur(),rdeflateindexostr), T(rnumthreads)
			{
				for ( uint64_t i = 0; i < T.size(); ++i )
				{
					BgzfDeflateParallelThread::unique_ptr_type tTi(new BgzfDeflateParallelThread(deflatecontext));
					T[i] = UNIQUE_PTR_MOVE(tTi);
					T[i]->start();
				}
			}
Пример #14
0
			element_type * get()
			{
				if ( ! freelistfill )
				{
					// allocate more alignment objects
					libmaus::autoarray::AutoArray<element_type *> nalloclist(
						std::max(
							static_cast<uint64_t>(1),
							static_cast<uint64_t>(2*alloclist.size())
						)
						,false
					);

					std::copy(alloclist.begin(),alloclist.end(),nalloclist.begin());
					element_type * nullp = 0;
					std::fill(nalloclist.begin()+alloclist.size(),nalloclist.end(),nullp);
					
					for ( element_type ** p = nalloclist.begin()+alloclist.size();
						p != nalloclist.end(); ++p )
						*p = new element_type;
					
					libmaus::autoarray::AutoArray<element_type *> nfreelist(
						std::max(
							static_cast<uint64_t>(1),
							static_cast<uint64_t>(2*freelist.size())
						)
						,false			
					);
					
					std::copy(freelist.begin(),freelist.end(),nfreelist.begin());
					std::fill(nfreelist.begin()+freelist.size(),nfreelist.end(),nullp);
				
					freelist = nfreelist;
					
					for ( element_type ** p = nalloclist.begin()+alloclist.size();
						p != nalloclist.end(); ++p )
						freelist[freelistfill++] = *p;			
					
					alloclist = nalloclist;
				}
				
				return freelist[--freelistfill];
			}
Пример #15
0
                        SynchronousOutputBuffer8(std::string const & rfilename, uint64_t const bufsize, bool truncate = true)
                        : filename(rfilename), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN())
                        {
                                if ( truncate )
                                {
        				std::ofstream ostr(filename.c_str(), std::ios::binary);
	        			ostr.flush();
                                }

                        }
Пример #16
0
			::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > computeSymAccu()
			{
				uint64_t numint = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					numint += index[i].size();
				::libmaus::autoarray::AutoArray<uint64_t> preaccu(numint+1);
				uint64_t k = 0;
				for ( uint64_t i = 0; i < index.size(); ++i )
					for ( uint64_t j = 0; j < index[i].size(); ++j )
						preaccu[k++] = index[i][j].vcnt;
						
				preaccu.prefixSums();
				::libmaus::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > accu(numint);
				for ( uint64_t i = 1; i < preaccu.size(); ++i )
					accu[i-1] = std::pair<uint64_t,uint64_t>(
						std::pair<uint64_t,uint64_t>(preaccu[i-1],preaccu[i])
						);
				return accu;
			}
Пример #17
0
			SocketFastReaderBase(::libmaus::network::SocketBase * rsocket, uint64_t const bufsize)
			: 
				socket(rsocket),
				B(bufsize),
				pa(B.get()),
				pc(pa),
				pe(pc),
				c(0)
			{
			}
Пример #18
0
			~FileBunchLRU()
			{
				for ( uint64_t i = 0; i < files.size(); ++i )
					if ( files[i].get() )
					{
						files[i] -> flush();
						files[i] -> close();
						files[i].reset();
					}
			}
			static void writeArray(::libmaus::autoarray::AutoArray<data_type> const & A, 
				std::string const & outputfilename)
			{
				this_type out(outputfilename,64*1024);
				
				for ( uint64_t i = 0; i < A.getN(); ++i )
					out.put(A[i]);
				
				out.flush();
			}
Пример #20
0
			AsynchronousBufferReader(
				std::string const & filename, 
				uint64_t const rnumbufs, 
				uint64_t const rbufsize,
				uint64_t const offset
			)
			: std::ifstream(filename.c_str()), bufsize(rnumbufs * rbufsize), 
                          abuffer(bufsize), buffer(abuffer.get()), av(true)
			{
				std::ifstream::seekg(offset,std::ios::beg);
			}
Пример #21
0
			bool getNextTriple(TripleEdge & triple)
			{
				if ( ! curbufleft )
				{
					istr.read ( reinterpret_cast<char *>(B.get()), B.getN() * sizeof(TripleEdge) );
					
					if ( istr.gcount() == 0 )
						return false;

					assert ( istr.gcount() % sizeof(TripleEdge) == 0 );
					
					curbufleft = istr.gcount() / sizeof(TripleEdge);
					assert ( curbufleft );
					curtrip = B.get();
				}

				triple = *(curtrip++);
				curbufleft -= 1;

				return true;
			}
Пример #22
0
			char const * prevStart(char const * e) const
			{
				if ( e == C.begin() )
					return 0;
				
				assert ( e[-1] == '\n' );
				// step over last/quality line's newline
				--e;
				
				// search for plus line's newline
				while ( *--e != '\n' ) {}
				// search for sequence line's newline
				while ( *--e != '\n' ) {}
				// search for id line's newline
				while ( *--e != '\n' ) {}
				// search for start of line
				while ( e != C.begin() && e[-1] != '\n' )
					--e;
					
				return e;
			}
Пример #23
0
			uint64_t lookupValue(uint64_t const v) const
			{				
				if ( !index.size() )
					return 0;

				typedef IndexEntryValueGetAdapter<IndexEntry const *> adapter_type;
				adapter_type IEKGA(index.get());
				::libmaus::util::ConstIterator<adapter_type,uint64_t> const ita(&IEKGA);
				::libmaus::util::ConstIterator<adapter_type,uint64_t> ite(ita);
				ite += index.size();
				
				::libmaus::util::ConstIterator<adapter_type,uint64_t> R =
					::std::lower_bound(ita,ite,v);
					
				if ( R == ite )
					return index.size()-1;
					
				if ( v == *R )
					return R-ita;
				else
					return (R-ita)-1;
			}
Пример #24
0
			void extendInternal()
			{
				unique_ptr_type O(new this_type(slog+1));
				for ( uint64_t i = 0; i < H.size(); ++i )
					if ( H[i].first != base_type::unused() )
						O->insert ( H[i].first, H[i].second );
				
				slog = O->slog;
				hashsize = O->hashsize;
				hashmask = O->hashmask;
				fill = O->fill;
				H = O->H;
			}
Пример #25
0
			int_type underflow()
			{
				// if there is still data, then return it
				if ( gptr() < egptr() )
					return static_cast<int_type>(*uptr());

				assert ( gptr() == egptr() );

				// number of bytes for putback buffer
				uint64_t const putbackcopy = std::min(
					static_cast<uint64_t>(gptr() - eback()),
					putbackspace
				);
				// copy bytes
				std::copy(
					gptr()-putbackcopy,
					gptr(),
					buffer.begin() + putbackspace - putbackcopy
				);
				
				// load data
				uint64_t const uncompressedsize = stream.read(
						buffer.begin()+putbackspace,
						buffer.size()-putbackspace
					);
				
				// set buffer pointers
				setg(
					buffer.begin()+putbackspace-putbackcopy,
					buffer.begin()+putbackspace,
					buffer.begin()+putbackspace+uncompressedsize);

				symsread += uncompressedsize;
				
				if ( uncompressedsize )
					return static_cast<int_type>(*uptr());
				else
					return traits_type::eof();
			}
Пример #26
0
		void checkSpace(uint64_t const outlen)
		{
			// buffer overflow?
			if ( freeSpace() < outlen )
			{
				flush();
				assert ( opc == opa );
			
				if ( outlen > outbuf.size() )
				{
					::libmaus::autoarray::AutoArray<uint8_t> newbuf(outlen);	
					std::copy( outbuf.begin(), outbuf.end(), newbuf.begin() );
					
					outbuf = newbuf;
					opa = outbuf.begin();
					opc = opa;
					ope = outbuf.end();
				}
			}
			
			assert ( freeSpace() >= outlen );		
		}
Пример #27
0
			/* decode next block */
			bool decodeBlock()
			{
				/* open new file if necessary */
				bool changedfile = false;
				while ( fileptr < idda.data.size() && blockptr == idda.data[fileptr].numentries )
				{
					fileptr++;
					blockptr = 0;
					changedfile = true;
				}
				if ( fileptr == idda.data.size() )
					return false;
				if ( changedfile )
					openNewFile();

				/* align to word boundary */
				GD->flush();
				/* read block size */
				uint64_t const blocksize = GD->decodeWord(32);

				/* increase size of memory buffer if necessary */
				if ( blocksize > decodebuf.size() )
					decodebuf.resize(blocksize);

				/* set buffer pointers */
				pa = decodebuf.begin();
				pc = pa;
				pe = pa + blocksize;

				/* decode block */
				for ( uint64_t i = 0; i < blocksize; ++i )
					decodebuf[i] = GD->decode();

				/* increment block pointer */
				blockptr++;
				
				return true;
			}
Пример #28
0
			static void merge(std::vector<std::string> const & inputfilenames, std::string const & outputfilename)
			{
				CheckOverlapResultMergeInput in(inputfilenames);
				std::ofstream ostr(outputfilename.c_str(),std::ios::binary);
				
				CheckOverlapResult::shared_ptr_type ptr;
				
				while ( (ptr=in.get()) )
					ptr->serialise(ostr);
					
				ostr.flush();
				assert ( ostr );
				ostr.close();
			}
Пример #29
0
			void internalFlush()
			{
				if ( pathigh != patlow )
				{
					#if defined(LIBMAUS_FASTX_FASTQBGZFWRITER_PARALLEL)
					uint64_t const bcnt = bgzfenc->writeSyncedCount(C.begin(),pc-C.begin());
					libmaus::util::UTF8::encodeUTF8(bcnt,*bgzfidxcntoutstr);
					libmaus::fastx::FastInterval const FI(patlow,pathigh,0,0,lnumsyms,minlen,maxlen);
					#else
					std::pair<uint64_t,uint64_t> bcntccnt = bgzfenc->writeSyncedCount(C.begin(),pc-C.begin());
					libmaus::fastx::FastInterval const FI(patlow,pathigh,cacc,cacc+bcntccnt.second,lnumsyms,minlen,maxlen);
					cacc += bcntccnt.second;
					#endif
					
					(*fioutstr) << FI.serialise();				
					blockcnt += 1;
						
					std::cerr << FI << std::endl;
					
					reset();
					patlow = pathigh;
				}	
			}
Пример #30
0
			SynchronousGenericInput(std::istream & ristr, uint64_t const rbufsize, 
				uint64_t const rtotalwords = std::numeric_limits<uint64_t>::max(),
				bool const rcheckmod = true
			)
			: bufsize(rbufsize), buffer(bufsize,false), 
			  pa(buffer.get()), pc(pa), pe(pa),
			  Pistr(),
			  istr(ristr),
			  totalwords ( rtotalwords ),
			  totalwordsread(0),
			  checkmod(rcheckmod)
			{
			
			}