Пример #1
0
			void broadcastSend(
				::libmaus2::network::Interface const & interface,
				unsigned short const broadcastport,
				::libmaus2::autoarray::AutoArray < ::libmaus2::network::ClientSocket::unique_ptr_type > & secondarysockets,
				unsigned int const packsize = 508
			) const
			{
				std::cerr << "Writing FI...";
				for ( uint64_t i = 0; i < secondarysockets.size(); ++i )
					secondarysockets[i]->writeString(FI.serialise());
				std::cerr << "done.";

				std::cerr << "Broadcasting designators...";
				::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport,
					secondarysockets,designators.get(),designators.size(),packsize);
				std::cerr << "done.";

				std::cerr << "Broadcasting shortpointers...";
				::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport,
					secondarysockets,shortpointers.get(),shortpointers.size(),packsize);
				std::cerr << "done.";

				std::cerr << "Broadcasting longpointers...";
				::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport,
					secondarysockets,longpointers.get(),longpointers.size(),packsize);
				std::cerr << "done.";

				std::cerr << "Broadcasting text...";
				::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport,
					secondarysockets,text.get(),text.size(),packsize);
				std::cerr << "done.";
			}
Пример #2
0
			/**
			  * put terminator num
			  *
			  * @param num terminator number
			  **/
			void putTerm(uint64_t num)
			{
				uint8_t * p = termbuf.get() + termbuf.getN();
				for ( unsigned int i = 0; i < expo; ++i )
				{
					*(--p) = (num % base) + 1;
					num /= base;
				}
				assert ( p == termbuf.get() );
				for ( unsigned int i = 0; i < expo; ++i )
					put( *(p++) );
			}
Пример #3
0
			/**
			 * constructor
			 *
			 * @param rfilename file name
			 * @param rnumbuffers number of buffers
			 * @param rbufsize size of each buffer
			 * @param roffset initial file offset
			 **/
			AsynchronousBufferReader ( std::string const & rfilename, uint64_t rnumbuffers = 16, uint64_t rbufsize = 32, uint64_t roffset = 0 )
			: filename(rfilename),
			  fd( open(filename.c_str(),O_RDONLY ) ),
			  numbuffers(rnumbuffers), bufsize(rbufsize),
			  bufferspace ( numbuffers * bufsize ),
			  buffers ( numbuffers ),
			  contexts(numbuffers), low(0), high(0), offset(roffset)
			{
				if ( fd < 0 )
				{
					::libmaus2::exception::LibMausException se;
					se.getStream() << "::libmaus2::aio::AsynchronousBufferReader: Failed to open file " << filename << ": " << strerror(errno);
					se.finish();

					/*
					std::cerr << se.s << std::endl;

					kill ( getpid(), SIGSTOP );
					*/

					throw se;
				}

				for ( unsigned int i = 0; i < numbuffers; ++i )
					buffers[i] = bufferspace.get() + i*bufsize;

				while ( high < numbuffers )
					enqueRead();
			}
Пример #4
0
			SocketOutputBufferTemplate(
				::libmaus2::network::SocketBase * rdst, 
				int const rtag,
				uint64_t const bufsize)
			: dst(rdst), tag(rtag), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN())
			{
			}
Пример #5
0
                        /**
                         * constructor by output stream
                         *
                         * @param out output stream
                         * @param bufsize output buffer size
                         **/
                        SynchronousGenericOutput(std::ostream & out, uint64_t const bufsize)
                        : B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), 
                          W(out),
                          datawrittentofile(0)
                        {

                        }
Пример #6
0
 void fillBuffer()
 {
     in.read( reinterpret_cast<char *>(B.get()), n * sizeof(data_type) );
     assert ( in.gcount() % sizeof(data_type) == 0 );
     c = 0;
     f = in.gcount() / sizeof(data_type);
 }
Пример #7
0
			CharTermTable(uint8_t c)
			: atable(257), table(atable.get()+1)
			{
				for ( unsigned int i = 0; i < 256; ++i )
					table[i] = false;
				table[-1] = true;
				table[c] = true;
			}
Пример #8
0
			Array864(iterator a, iterator e)
			{
				n = e-a;
				
				if ( n )
				{
					B = ::libmaus2::autoarray::AutoArray<data_type>((n+63)/64);
					writer_type W(B.get());
				
					for ( iterator i = a; i != e; ++i )
						W.writeBit( *i < 256 );
					
					W.flush();
				
					::libmaus2::rank::ERank222B::unique_ptr_type tR(new ::libmaus2::rank::ERank222B(B.get(), B.size()*64));
					R = UNIQUE_PTR_MOVE(tR);
					
					uint64_t const n8 = R->rank1(n-1);
					uint64_t const n64 = R->rank0(n-1);
					
					A8 = ::libmaus2::autoarray::AutoArray<uint8_t>(n8,false);
					A64 = ::libmaus2::autoarray::AutoArray<uint64_t>(n64,false);

					uint64_t j = 0;
					for ( iterator i = a; i != e; ++i,++j )
						if ( *i < 256 )
							A8[ R->rank1(j)-1 ] = *i;
						else
							A64[ R->rank0(j)-1 ] = *i;
					
					#if 0		
					j = 0;
					for ( iterator i = a; i != e; ++i, ++j )
						assert ( (*this)[j] == *i );
					#endif
				
					#if defined(ARRAY864DEBUG)
					#if defined(_OPENMP)
					#pragma omp parallel for
					#endif	
					for ( int64_t i = 0; i < static_cast<int64_t>(n); ++i )
						assert ( (*this)[i] == a[i] );
					#endif
				}
				
			}
Пример #9
0
			/**
			 * constructor by file name
			 *
			 * @param filename name of output file
			 * @param bufsize size of output buffer
			 * @param truncate true if file should be truncated false data should be appended
			 * @param offset write offset in bytes
			 **/
                        SynchronousGenericOutput(std::string const & filename, uint64_t const bufsize, bool const truncate = true, uint64_t const offset = 0, bool const /* metasync */ = true)
                        : B(bufsize,false), pa(B.get()), pc(pa), pe(pa+B.getN()), 
                          PW ( truncate ? new ofstream_type(filename) : 0),
                          PF ( truncate ? 0 : new std::fstream(filename.c_str(), std::ios::binary|std::ios::in|std::ios::out|std::ios::ate) ),
                          W  ( truncate ? (static_cast<std::ostream &>(*PW)) : (static_cast<std::ostream &>(*PF)) ),
                          datawrittentofile(0)
                        {
                        	W.seekp(offset,std::ios::beg);
                        }
Пример #10
0
			SocketFastReaderBase(::libmaus2::network::SocketBase * rsocket, uint64_t const bufsize)
			: 
				socket(rsocket),
				B(bufsize),
				pa(B.get()),
				pc(pa),
				pe(pc),
				c(0)
			{
			}
Пример #11
0
			/**
			 * constructor
			 *
			 * @param filename file name
			 * @param rnumbufs number of buffers
			 * @param rbufsize size of each buffer
			 * @param offset initial file offset
			 **/
			AsynchronousBufferReader(
				std::string const & filename,
				uint64_t const rnumbufs,
				uint64_t const rbufsize,
				uint64_t const offset
			)
			: libmaus2::aio::InputStreamInstance(filename), bufsize(rnumbufs * rbufsize),
                          abuffer(bufsize), buffer(abuffer.get()), av(true)
			{
				libmaus2::aio::InputStreamInstance::seekg(offset,std::ios::beg);
			}
Пример #12
0
			/**
			 * access operator
			 *
			 * @param i index of element to be accessed
			 * @return element at index i
			 **/
			uint64_t operator[](uint64_t const i) const
			{
				if ( i >= n )
				{
					::libmaus2::exception::LibMausException se;
					se.getStream() << "Access of element " << i << " >= " << n << " in Array864::operator[]";
					se.finish();
					throw se;
				}
			
				if ( ::libmaus2::bitio::getBit(B.get(),i) )
					return A8[R->rank1(i)-1];
				else
					return A64[R->rank0(i)-1];
			}
Пример #13
0
			FileBunchLRU ( std::vector < std::string > const & rfilenames, uint64_t rlrusize = 1024)
			: LRU(rlrusize), lrusize(rlrusize), filenames ( rfilenames ), mapping(filenames.size()), rmapping(lrusize), files(lrusize)
			{
				std::fill ( mapping.get(), mapping.get() + mapping.getN(), lrusize );
			}
Пример #14
0
 void writeContents()
 {
     // std::cerr << "writing buffer of " << f << " words." << std::endl;
     out.write( reinterpret_cast<char const *>(B.get()), f * sizeof(data_type) );
     f = 0;
 }
Пример #15
0
			void writeBuffer()
			{
				if ( pc-pa )
					dst->writeMessage ( tag , B.get() , pc-pa );
				pc = pa;
			}
Пример #16
0
			static LCSResult lcs(std::string const & a, std::string const & b)
			{
				/* concatenate a and b into string c */
				std::string c(a.size()+b.size()+2,' ');
				for ( uint64_t i = 0; i < a.size(); ++i )
					c[i] = a[i]+2;
				c[a.size()] = 0;
				for ( uint64_t i = 0; i < b.size(); ++i )
					c[a.size()+1+i] = b[i]+2;
				c[c.size()-1] = 1;

				// allocate suffix sorting
				::libmaus2::autoarray::AutoArray<int32_t> SA(c.size(),false);

				// perform suffix sorting
				typedef ::libmaus2::suffixsort::DivSufSort<32,uint8_t *,uint8_t const *,int32_t *,int32_t const *,alphabet_size+2> sort_type;
				sort_type::divsufsort(reinterpret_cast<uint8_t const *>(c.c_str()), SA.get(), c.size());

				// compute LCP array
				::libmaus2::autoarray::AutoArray<int32_t> LCP = ::libmaus2::suffixsort::SkewSuffixSort<uint8_t,int32_t>::lcpByPlcp(
					reinterpret_cast<uint8_t const *>(c.c_str()), c.size(), SA.get());

				// compute psv and nsv arrays for simulating parent operation on suffix tree
				::libmaus2::autoarray::AutoArray<int32_t> const prev = ::libmaus2::sv::PSV::psv(LCP.get(),LCP.size());
				::libmaus2::autoarray::AutoArray<int32_t> const next = ::libmaus2::sv::NSV::nsv(LCP.get(),LCP.size());

				#if defined(LCS_DEBUG)
				for ( uint64_t i = 0; i < c.size(); ++i )
				{
					std::cerr << i << "\t" << LCP[i] << "\t" << prev[i] << "\t" << next[i] << "\t";
					for ( std::string::const_iterator ita = c.begin()+SA[i]; ita != c.end(); ++ita )
						if ( isalnum(*ita) )
							std::cerr << *ita;
						else
							std::cerr << "<" << static_cast<int>(*ita) << ">" ;
					std::cerr << std::endl;
				}

				std::cerr << "---" << std::endl;
				#endif

				int32_t const n = c.size();
				// queue all suffix tree leafs
				std::deque < QNode > Q;
				for ( int32_t i = 0; i < n; ++i )
					Q.push_back ( QNode(i,i,0, (SA[i]< static_cast<int32_t>(a.size()+1)) ? 1:2, 1 ) );

				// construct hash for tree nodes we have seen so far
				typedef ::libmaus2::util::unordered_set < QNode , HashQNode >::type hash_type;
				typedef hash_type::iterator hash_iterator_type;
				typedef hash_type::const_iterator hash_const_iterator_type;
				hash_type H(n);

				// we simulate a bottom up traversal of the generalised suffix tree for a and b
				while ( Q.size() )
				{
					// get node and compute parent
					QNode const I = Q.front(); Q.pop_front();
					QNode P = parent(I,LCP.get(),prev.get(),next.get(),n);

					// have we seen this node before?
					hash_iterator_type it = H.find(P);

					// no, insert it
					if ( it == H.end() )
					{
						it = H.insert(P).first;
					}
					// yes, update symbol mask and extend visited interval
					else
					{
						it->symmask |= I.symmask;
						it->fill += (I.right-I.left+1);
					}

					// if this is not the root and the node is full (we have seen all its children),
					// then put it in the queue
					if ( P.right-P.left + 1 < n && it->isFull() )
						Q.push_back(P);
				}

				// maximum lcp value
				int32_t maxlcp = 0;
				uint32_t maxpos_a = 0;
				uint32_t maxpos_b = 0;

				// consider all finished nodes
				for ( hash_const_iterator_type it = H.begin(); it != H.end(); ++it )
				{
					#if defined(LCS_DEBUG)
					std::cerr << *it << std::endl;
					#endif

					// we need to have nodes from both strings a and b under this
					// node (sym mask has bits for 1 and 2 set) and the lcp value must be
					// larger than what we already have
					if (
						it->symmask == 3 && it->depth > maxlcp
					)
					{
						maxlcp = it->depth;

						for ( int32_t q = it->left; q <= it->right; ++q )
						{
							if ( SA[q] < static_cast<int32_t>(a.size()) )
								maxpos_a = SA[q];
							else
								maxpos_b = SA[q] - (a.size()+1);
						}
					}
				}

				return LCSResult(maxlcp,maxpos_a,maxpos_b);
			}
Пример #17
0
			/**
			 * constructor
			 *
			 * @param filename output file name
			 * @param bufsize size of output buffer in elements
			 **/
			OutputBuffer(std::string const & filename, uint64_t const bufsize)
			: B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), W(filename,16)
			{
		
			}
Пример #18
0
			CompactReadContainer(
				std::vector<std::string> const & filenames,
				::libmaus2::fastx::FastInterval const & rFI,
				bool const verbose = false
			)
			: FI(rFI), numreads(FI.high-FI.low), designators( (numreads+63)/64 ), shortpointers(numreads,false), longpointers(), text(FI.fileoffsethigh-FI.fileoffset,false)
			{
				typedef ::libmaus2::fastx::CompactFastConcatDecoder reader_type;
				// typedef reader_type::pattern_type pattern_type;
				reader_type CFD(filenames,FI);

				uint64_t codepos = 0;
				uint64_t offsetbase = 0;

				// bool const verbose = true;

				uint64_t const mod = std::max((numreads+50)/100,static_cast<uint64_t>(1));
				uint64_t const bmod = libmaus2::math::nextTwoPow(mod);
				uint64_t const bmask = bmod-1;

				if ( verbose )
				{
					if ( isatty(STDERR_FILENO) )
						std::cerr << "Computing designators/pointers...";
					else
						std::cerr << "Computing designators/pointers..." << std::endl;
				}

				std::vector < uint64_t > prelongpointers;
				prelongpointers.push_back(0);
				writer_type W(designators.get());
				for ( uint64_t i = 0; i < numreads; ++i )
				{
					if (
						(
							codepos-offsetbase
							>
							static_cast<uint64_t>(std::numeric_limits<uint16_t>::max())
						)
					)
					{
						W.writeBit(1);
						offsetbase = codepos;
						prelongpointers.push_back(offsetbase);
					}
					else
					{
						W.writeBit(0);
					}
					shortpointers[i] = codepos-offsetbase;

					CFD.skipPattern(codepos);

					if ( verbose && ((i & (bmask)) == 0) )
					{
						if ( isatty(STDERR_FILENO) )
							std::cerr << "(" << i/static_cast<double>(numreads)  << ")";
						else
							std::cerr << "Finished " << i/static_cast<double>(numreads)  << std::endl;
					}
				}
				W.flush();

				longpointers = ::libmaus2::autoarray::AutoArray< uint64_t >(prelongpointers.size(),false);
				std::copy(prelongpointers.begin(),prelongpointers.end(),longpointers.begin());

				if ( verbose )
					std::cerr << "Done." << std::endl;

				if ( verbose )
					std::cerr << "Loading text...";
				std::vector < libmaus2::aio::FileFragment > const frags =
					::libmaus2::fastx::CompactFastDecoder::getDataFragments(filenames);
				::libmaus2::aio::ReorderConcatGenericInput<uint8_t> RCGI(frags,64*1024,text.size(),FI.fileoffset);
				uint64_t const textread = RCGI.read(text.begin(),text.size());

				if ( textread != text.size() )
				{
					libmaus2::exception::LibMausException se;
					se.getStream() << "Failed to read text in CompactReadContainer." << std::endl;
					se.finish();
					throw se;
				}
				if ( verbose )
					std::cerr << "done." << std::endl;

				if ( verbose )
					std::cerr << "Setting up rank dictionary for designators...";
				setupRankDictionary();
				if ( verbose )
					std::cerr << "done." << std::endl;

				#if 0
				std::cerr << "Checking dict...";
				reader_type CFD2(filenames,FI);
				for ( uint64_t i = 0; i < numreads; ++i )
				{
					if ( CFD2.istr.getptr != longpointers [ designatorrank->rank1(i) ] + shortpointers[i] )
					{
						std::cerr << "Failure for i=" << i << std::endl;
						std::cerr << "Ptr is " << CFD2.istr.getptr << std::endl;
						std::cerr << "Expected " << longpointers [ designatorrank->rank1(i) ] + shortpointers[i] << std::endl;
						assert ( CFD2.istr.getptr == longpointers [ designatorrank->rank1(i) ] + shortpointers[i] );
					}
					::libmaus2::fastx::Pattern pattern;
					CFD2.getNextPatternUnlocked(pattern);
				}
				std::cerr << "done." << std::endl;
				#endif
			}
Пример #19
0
			EditDistanceResult process(
				iterator_a a, 
				uint64_t const n,
				iterator_b b,
				uint64_t const m,
				uint64_t const k = 0,
				similarity_type const gain_match = 1,
				similarity_type const penalty_subst = 1,
				similarity_type const penalty_ins = 1,
				similarity_type const penalty_del = 1
			)
			{
				setup(n,m,k);
			
				element_type * p = M.begin();

				int64_t firstpen = 0;
				for ( uint64_t i = 0; i < n1; ++i, firstpen -= penalty_del )
					*(p++) = element_type(firstpen,STEP_DEL);
					
				element_type * q = M.begin();

				iterator_a const ae = a+n;
				iterator_b const be = b+m;				
				while ( b != be )
				{
					typename std::iterator_traits<iterator_b>::value_type const bchar = *(b++);
					
					assert ( (p-M.begin()) % n1 == 0 );
					assert ( (q-M.begin()) % n1 == 0 );
					
					// top
					*p = element_type(q->first-penalty_ins,STEP_INS);
					
					for ( iterator_a aa = a; aa != ae; ++aa )
					{
						// left
						similarity_type const left =  p->first - penalty_del;
						// diagonal match?
						bool const dmatch = (*aa == bchar);
						// diagonal
						similarity_type const diag =
							dmatch 
							?
							(q->first + gain_match)
							:
							(q->first - penalty_subst);
						// move pointer in row above
						q++;
						// top
						similarity_type const top = q->first - penalty_ins;
						// move pointer in current row
						p++;
					
						switch ( edit_distance_priority )
						{
							case del_ins_diag:
								if ( left >= top )
								{
									if ( left >= diag )
										// left
										*p = element_type(left,STEP_DEL);
									else							
										// diag
										*p = element_type(diag,dmatch ? STEP_MATCH : STEP_MISMATCH);
								}
								// top >= left
								else
								{
									if ( top >= diag )
										// top
										*p = element_type(top,STEP_INS);
									else
										// diag
										*p = element_type(diag,dmatch ? STEP_MATCH : STEP_MISMATCH);
								}
								break;
							case diag_del_ins:
								if ( diag >= left )
								{
									if ( diag >= top )
										// diag
										*p = element_type(diag,dmatch ? STEP_MATCH : STEP_MISMATCH);
									else
										// top
										*p = element_type(top,STEP_INS);
								}
								else
								{
									if ( left >= top )
										// left
										*p = element_type(left,STEP_DEL);
									else
										// top
										*p = element_type(top,STEP_INS);
								}
								break;
						}	
					}	
					
					p++;
					q++;				
				}
				
				b -= m;

				uint64_t i = n;
				uint64_t j = m;
				element_type * pq = M.get() + j*n1 + i;

				ta = te;
				
				uint64_t numdel = 0;
				uint64_t numins = 0;
				uint64_t nummat = 0;
				uint64_t nummis = 0;

				while ( pq != M.begin() )
				{
					*(--ta) = pq->second;
					
					switch ( pq->second )
					{
						// previous row
						case STEP_INS:
							pq -= n1;
							numins++;
							break;
						// previous column
						case STEP_DEL:
							pq -= 1;
							numdel++;
							break;
						// diagonal
						case STEP_MATCH:
							pq -= (n1+1);
							nummat++;
							break;
						// diagonal
						case STEP_MISMATCH:
							pq -= (n1+1);
							nummis++;
							break;
						default:
							break;
					}
				}
				
				return EditDistanceResult(numins,numdel,nummat,nummis);
			}	
Пример #20
0
			void setupRankDictionary()
			{
				rank_ptr_type tdesignatorrank(new rank_type(designators.get(), designators.size()*64));
				designatorrank = UNIQUE_PTR_MOVE(tdesignatorrank);
			}