void putTerm(uint64_t num)
			{
				uint8_t * p = termbuf.get() + termbuf.getN();
				for ( unsigned int i = 0; i < expo; ++i )
				{
					*(--p) = (num % base) + 1;
					num /= base;
				}
				assert ( p == termbuf.get() );
				for ( unsigned int i = 0; i < expo; ++i )
					put( *(p++) );
			}
			SynchronousGenericInput(
				std::string const & filename, 
				uint64_t const rbufsize, 
				uint64_t const roffset = 0,
				uint64_t const rtotalwords = std::numeric_limits<uint64_t>::max()
			)
			: bufsize(rbufsize), buffer(bufsize,false), 
			  pa(buffer.get()), pc(pa), pe(pa),
			  Pistr(new ifstream_type(filename.c_str(),std::ios::binary)),
			  istr(*Pistr),
			  totalwords ( std::min ( ::libmaus::util::GetFileSize::getFileSize(filename) / sizeof(input_type) - roffset, rtotalwords) ),
			  totalwordsread(0),
			  checkmod(true)
			{
				if ( ! Pistr->is_open() )
				{
					::libmaus::exception::LibMausException se;
					se.getStream() << "Unable to open file " << filename << ": " << strerror(errno);
					se.finish();
					throw se;
				}
				Pistr->seekg(roffset * sizeof(input_type), std::ios::beg);
				if ( ! istr )
				{
					::libmaus::exception::LibMausException se;
					se.getStream() << "Unable to seek file " << filename << ": " << strerror(errno);
					se.finish();
					throw se;
				}

			}
			bool fillBuffer()
			{
				assert ( totalwordsread <= totalwords );
				uint64_t const remwords = totalwords-totalwordsread;
				uint64_t const toreadwords = std::min(remwords,bufsize);
				
				istr.read ( reinterpret_cast<char *>(buffer.get()), toreadwords * sizeof(input_type));
				uint64_t const bytesread = istr.gcount();
				
				if ( checkmod && (bytesread % sizeof(input_type) != 0) )
				{
					::libmaus::exception::LibMausException se;
					se.getStream() << "SynchronousGenericInput::fillBuffer: Number of bytes " << bytesread << " read is not a multiple of entity type." << std::endl;
					se.finish();
					throw se;
				}
				
				uint64_t const wordsread = bytesread / sizeof(input_type);
					
				if ( wordsread == 0 )
				{
					if ( totalwordsread != totalwords )
					{
						std::cerr << "SynchronousGenericInput<>::getNext(): WARNING: read 0 words but there should be " <<
							remwords << " left." << std::endl;
					}
				
					return false;
				}
					
				pc = pa;
				pe = pa + wordsread;
			
				return true;
			}
			SocketOutputBufferTemplate(
				::libmaus::network::SocketBase * rdst, 
				int const rtag,
				uint64_t const bufsize)
			: dst(rdst), tag(rtag), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN())
			{
			}
			AsynchronousBufferReader ( std::string const & rfilename, 
				uint64_t rnumbuffers = 16, uint64_t rbufsize = 32, uint64_t roffset = 0 )
			: filename(rfilename),
			  fd( open(filename.c_str(),O_RDONLY ) ), 
			  numbuffers(rnumbuffers), bufsize(rbufsize), 
			  bufferspace ( numbuffers * bufsize ),
			  buffers ( numbuffers ),
			  contexts(numbuffers), low(0), high(0), offset(roffset)
			{
				if ( fd < 0 )
				{
					::libmaus::exception::LibMausException se;
					se.getStream() << "::libmaus::aio::AsynchronousBufferReader: Failed to open file " << filename << ": " << strerror(errno);
					se.finish();
					
					/*
					std::cerr << se.s << std::endl;
					
					kill ( getpid(), SIGSTOP );					
					*/
					
					throw se;
				}
				
				for ( unsigned int i = 0; i < numbuffers; ++i )
					buffers[i] = bufferspace.get() + i*bufsize;

				while ( high < numbuffers )
					enqueRead();
			}
Beispiel #6
0
			/**
			 * constructor
			 *
			 * @param rW output writer
			 * @param bufsize size of buffer
			 * @param rmetaid meta information for each written block
			 **/
                        MetaOutputBuffer8(
				::libmaus::aio::AsynchronousWriter & rW, 
				uint64_t const bufsize,
				uint64_t const rmetaid)
                        : B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), W(rW), metaid(rmetaid)
                        {

                        }
Beispiel #7
0
			CharTermTable(uint8_t c)
			: atable(257), table(atable.get()+1)
			{
				for ( unsigned int i = 0; i < 256; ++i )
					table[i] = false;
				table[-1] = true;
				table[c] = true;
			}
                        SynchronousOutputBuffer8(std::string const & rfilename, uint64_t const bufsize, bool truncate = true)
                        : filename(rfilename), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN())
                        {
                                if ( truncate )
                                {
        				std::ofstream ostr(filename.c_str(), std::ios::binary);
	        			ostr.flush();
                                }

                        }
Beispiel #9
0
			SocketFastReaderBase(::libmaus::network::SocketBase * rsocket, uint64_t const bufsize)
			: 
				socket(rsocket),
				B(bufsize),
				pa(B.get()),
				pc(pa),
				pe(pc),
				c(0)
			{
			}
			AsynchronousBufferReader(
				std::string const & filename, 
				uint64_t const rnumbufs, 
				uint64_t const rbufsize,
				uint64_t const offset
			)
			: std::ifstream(filename.c_str()), bufsize(rnumbufs * rbufsize), 
                          abuffer(bufsize), buffer(abuffer.get()), av(true)
			{
				std::ifstream::seekg(offset,std::ios::beg);
			}
Beispiel #11
0
			bool getNextTriple(TripleEdge & triple)
			{
				if ( ! curbufleft )
				{
					istr.read ( reinterpret_cast<char *>(B.get()), B.getN() * sizeof(TripleEdge) );
					
					if ( istr.gcount() == 0 )
						return false;

					assert ( istr.gcount() % sizeof(TripleEdge) == 0 );
					
					curbufleft = istr.gcount() / sizeof(TripleEdge);
					assert ( curbufleft );
					curtrip = B.get();
				}

				triple = *(curtrip++);
				curbufleft -= 1;

				return true;
			}
Beispiel #12
0
			static void merge(std::vector<std::string> const & inputfilenames, std::string const & outputfilename)
			{
				CheckOverlapResultMergeInput in(inputfilenames);
				std::ofstream ostr(outputfilename.c_str(),std::ios::binary);
				
				CheckOverlapResult::shared_ptr_type ptr;
				
				while ( (ptr=in.get()) )
					ptr->serialise(ostr);
					
				ostr.flush();
				assert ( ostr );
				ostr.close();
			}
			SynchronousGenericInput(std::istream & ristr, uint64_t const rbufsize, 
				uint64_t const rtotalwords = std::numeric_limits<uint64_t>::max(),
				bool const rcheckmod = true
			)
			: bufsize(rbufsize), buffer(bufsize,false), 
			  pa(buffer.get()), pc(pa), pe(pa),
			  Pistr(),
			  istr(ristr),
			  totalwords ( rtotalwords ),
			  totalwordsread(0),
			  checkmod(rcheckmod)
			{
			
			}
			SynchronousOutputBuffer8Posix(std::string const & rfilename, uint64_t const bufsize, bool truncate = true)
			: filename(rfilename), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), ptr(0)
			{
				if ( truncate )
				{
					int const tres = ::truncate(filename.c_str(),0);
					if ( tres )
					{
						::libmaus::exception::LibMausException se;
						se.getStream() << "SynchronousOutputBuffer8Posix::SynchronousOutputBuffer8Posix(): truncate() failed: " << strerror(errno) << std::endl;
						se.finish();
						throw se;
					}
				}
                        }
                        SynchronousGenericOutputPosix(
                                std::string const & rfilename, 
                                uint64_t const bufsize, 
                                bool const truncate,
                                uint64_t const offset,
                                bool const rmetasync = true
                        )
                        : filename(rfilename), dirname(::libmaus::util::ArgInfo::getDirName(filename)), metasync(rmetasync),
                          B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), 
                          fd ( -1 ),
                          totalwrittenbytes(0), totalwrittenwords(0)
                        {
                                while ( (fd = ::open(filename.c_str(), truncate ? (O_WRONLY|O_TRUNC|O_CREAT) : O_WRONLY , 0755 )) < 0 )
                                {
                                	switch ( errno )
                                	{
                                		case EINTR:
                                		{
                                			std::cerr << "Restarting open() system call interupted by signal." << std::endl;
                                			break;
                                		}
                                		default:
                                		{
		                                        ::libmaus::exception::LibMausException se;
        		                                se.getStream() << "Failed to open file "<< filename <<" in SynchronousGenericOutputPosix: " <<
                		                                strerror(errno);
                        		                se.finish();
                                		        throw se;
						}
					}
                                }
                                if ( lseek ( fd, offset, SEEK_SET) == static_cast<off_t>(-1) )
                                {
                                        close(fd);
                                        ::libmaus::exception::LibMausException se;
                                        se.getStream() << "Failed to seek " << filename << " in SynchronousGenericOutputPosix: " <<
                                                strerror(errno);
                                        se.finish();
                                        throw se;
                                }
                                
                                #if 0
                                std::cerr << "File " << filename << " opened for output in "
                                        << ::libmaus::util::Demangle::demangle<this_type>() << std::endl;
                                #endif
                        }
Beispiel #16
0
			uint64_t lookupValue(uint64_t const v) const
			{				
				if ( !index.size() )
					return 0;

				typedef IndexEntryValueGetAdapter<IndexEntry const *> adapter_type;
				adapter_type IEKGA(index.get());
				::libmaus::util::ConstIterator<adapter_type,uint64_t> const ita(&IEKGA);
				::libmaus::util::ConstIterator<adapter_type,uint64_t> ite(ita);
				ite += index.size();
				
				::libmaus::util::ConstIterator<adapter_type,uint64_t> R =
					::std::lower_bound(ita,ite,v);
					
				if ( R == ite )
					return index.size()-1;
					
				if ( v == *R )
					return R-ita;
				else
					return (R-ita)-1;
			}
Beispiel #17
0
			FileBunchLRU ( std::vector < std::string > const & rfilenames, uint64_t rlrusize = 1024)
			: LRU(rlrusize), lrusize(rlrusize), filenames ( rfilenames ), mapping(filenames.size()), rmapping(lrusize), files(lrusize)
			{
				std::fill ( mapping.get(), mapping.get() + mapping.getN(), lrusize );
			}
Beispiel #18
0
			static std::pair<uint32_t,uint32_t> lcs(std::string const & a, std::string const & b)
			{
				/* concatenate a and b into string c */
				std::string c(a.size()+b.size()+2,' ');
				for ( uint64_t i = 0; i < a.size(); ++i )
					c[i] = a[i]+2;
				c[a.size()] = 0;
				for ( uint64_t i = 0; i < b.size(); ++i )
					c[a.size()+1+i] = b[i]+2;
				c[c.size()-1] = 1;
				
				// allocate suffix sorting
				::libmaus::autoarray::AutoArray<int32_t> SA(c.size(),false);
				
				// perform suffix sorting
				typedef ::libmaus::suffixsort::DivSufSort<32,uint8_t *,uint8_t const *,int32_t *,int32_t const *,8> sort_type;
				typedef sort_type::saidx_t saidx_t;
				sort_type::divsufsort(reinterpret_cast<uint8_t const *>(c.c_str()), SA.get(), c.size());

				// compute LCP array
				::libmaus::autoarray::AutoArray<int32_t> LCP = ::libmaus::suffixsort::SkewSuffixSort<uint8_t,int32_t>::lcpByPlcp(
					reinterpret_cast<uint8_t const *>(c.c_str()), c.size(), SA.get());

				// compute psv and nsv arrays for simulating parent operation on suffix tree
				::libmaus::autoarray::AutoArray<int32_t> const prev = ::libmaus::sv::PSV::psv(LCP.get(),LCP.size());
				::libmaus::autoarray::AutoArray<int32_t> const next = ::libmaus::sv::NSV::nsv(LCP.get(),LCP.size());
				
				#if defined(LCS_DEBUG)
				for ( uint64_t i = 0; i < c.size(); ++i )
				{
					std::cerr << i << "\t" << LCP[i] << "\t" << prev[i] << "\t" << next[i] << "\t";
					for ( std::string::const_iterator ita = c.begin()+SA[i]; ita != c.end(); ++ita )
						if ( isalnum(*ita) )
							std::cerr << *ita;
						else
							std::cerr << "<" << static_cast<int>(*ita) << ">" ;
					std::cerr << std::endl;
				}
				
				std::cerr << "---" << std::endl;
				#endif

				int32_t const n = c.size();
				// queue all suffix tree leafs
				std::deque < QNode > Q;
				for ( int32_t i = 0; i < n; ++i )
					Q.push_back ( QNode(i,i,0, (SA[i]< static_cast<int32_t>(a.size()+1)) ? 1:2, 1 ) );

				// construct hash for tree nodes we have seen so far
				typedef ::libmaus::util::unordered_set < QNode , HashQNode >::type hash_type;
				typedef hash_type::iterator hash_iterator_type;
				typedef hash_type::const_iterator hash_const_iterator_type;
				hash_type H(n);
				
				// we simulate a bottom up traversal of the generalised suffix tree for a and b
				while ( Q.size() )
				{
					// get node and compute parent
					QNode const I = Q.front(); Q.pop_front();
					QNode P = parent(I,LCP.get(),prev.get(),next.get(),n);

					// have we seen this node before?
					hash_iterator_type it = H.find(P);

					// no, insert it
					if ( it == H.end() )
					{
						it = H.insert(P).first;
					}
					// yes, update symbol mask and extend visited interval
					else
					{
						it->symmask |= I.symmask;
						it->fill += (I.right-I.left+1);
					}
					
					// if this is not the root and the node is full (we have seen all its children), 
					// then put it in the queue
					if ( P.right-P.left + 1 < n && it->isFull() )
						Q.push_back(P);
				}
				
				// maximum lcp value
				int32_t maxlcp = 0;
				uint32_t maxpos = 0;
				
				// consider all finished nodes
				for ( hash_const_iterator_type it = H.begin(); it != H.end(); ++it )
				{
					#if defined(LCS_DEBUG)
					std::cerr << *it << std::endl;
					#endif
					
					// we need to have nodes from both strings a and b under this
					// node (sym mask has bits for 1 and 2 set) and the lcp value must be 
					// larger than what we already have
					if ( it->symmask == 3 && it->depth > maxlcp )
					{
						maxlcp = it->depth;
						maxpos = SA[it->left];
					}
				}
				
				return std::pair<uint32_t,uint32_t>(maxlcp,maxpos);
			}
			void writeBuffer()
			{
				if ( pc-pa )
					dst->writeMessage ( tag , B.get() , pc-pa );
				pc = pa;
			}
Beispiel #20
0
			SlowCumFreq(uint64_t rs)
			: s(rs), A(s,false)
			{
				std::fill(A.get(),A.get()+s,0);
			}