void broadcastSend( ::libmaus2::network::Interface const & interface, unsigned short const broadcastport, ::libmaus2::autoarray::AutoArray < ::libmaus2::network::ClientSocket::unique_ptr_type > & secondarysockets, unsigned int const packsize = 508 ) const { std::cerr << "Writing FI..."; for ( uint64_t i = 0; i < secondarysockets.size(); ++i ) secondarysockets[i]->writeString(FI.serialise()); std::cerr << "done."; std::cerr << "Broadcasting designators..."; ::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport, secondarysockets,designators.get(),designators.size(),packsize); std::cerr << "done."; std::cerr << "Broadcasting shortpointers..."; ::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport, secondarysockets,shortpointers.get(),shortpointers.size(),packsize); std::cerr << "done."; std::cerr << "Broadcasting longpointers..."; ::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport, secondarysockets,longpointers.get(),longpointers.size(),packsize); std::cerr << "done."; std::cerr << "Broadcasting text..."; ::libmaus2::network::UDPSocket::sendArrayBroadcast(interface,broadcastport, secondarysockets,text.get(),text.size(),packsize); std::cerr << "done."; }
/** * put terminator num * * @param num terminator number **/ void putTerm(uint64_t num) { uint8_t * p = termbuf.get() + termbuf.getN(); for ( unsigned int i = 0; i < expo; ++i ) { *(--p) = (num % base) + 1; num /= base; } assert ( p == termbuf.get() ); for ( unsigned int i = 0; i < expo; ++i ) put( *(p++) ); }
/** * constructor * * @param rfilename file name * @param rnumbuffers number of buffers * @param rbufsize size of each buffer * @param roffset initial file offset **/ AsynchronousBufferReader ( std::string const & rfilename, uint64_t rnumbuffers = 16, uint64_t rbufsize = 32, uint64_t roffset = 0 ) : filename(rfilename), fd( open(filename.c_str(),O_RDONLY ) ), numbuffers(rnumbuffers), bufsize(rbufsize), bufferspace ( numbuffers * bufsize ), buffers ( numbuffers ), contexts(numbuffers), low(0), high(0), offset(roffset) { if ( fd < 0 ) { ::libmaus2::exception::LibMausException se; se.getStream() << "::libmaus2::aio::AsynchronousBufferReader: Failed to open file " << filename << ": " << strerror(errno); se.finish(); /* std::cerr << se.s << std::endl; kill ( getpid(), SIGSTOP ); */ throw se; } for ( unsigned int i = 0; i < numbuffers; ++i ) buffers[i] = bufferspace.get() + i*bufsize; while ( high < numbuffers ) enqueRead(); }
SocketOutputBufferTemplate( ::libmaus2::network::SocketBase * rdst, int const rtag, uint64_t const bufsize) : dst(rdst), tag(rtag), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()) { }
/** * constructor by output stream * * @param out output stream * @param bufsize output buffer size **/ SynchronousGenericOutput(std::ostream & out, uint64_t const bufsize) : B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), W(out), datawrittentofile(0) { }
void fillBuffer() { in.read( reinterpret_cast<char *>(B.get()), n * sizeof(data_type) ); assert ( in.gcount() % sizeof(data_type) == 0 ); c = 0; f = in.gcount() / sizeof(data_type); }
CharTermTable(uint8_t c) : atable(257), table(atable.get()+1) { for ( unsigned int i = 0; i < 256; ++i ) table[i] = false; table[-1] = true; table[c] = true; }
Array864(iterator a, iterator e) { n = e-a; if ( n ) { B = ::libmaus2::autoarray::AutoArray<data_type>((n+63)/64); writer_type W(B.get()); for ( iterator i = a; i != e; ++i ) W.writeBit( *i < 256 ); W.flush(); ::libmaus2::rank::ERank222B::unique_ptr_type tR(new ::libmaus2::rank::ERank222B(B.get(), B.size()*64)); R = UNIQUE_PTR_MOVE(tR); uint64_t const n8 = R->rank1(n-1); uint64_t const n64 = R->rank0(n-1); A8 = ::libmaus2::autoarray::AutoArray<uint8_t>(n8,false); A64 = ::libmaus2::autoarray::AutoArray<uint64_t>(n64,false); uint64_t j = 0; for ( iterator i = a; i != e; ++i,++j ) if ( *i < 256 ) A8[ R->rank1(j)-1 ] = *i; else A64[ R->rank0(j)-1 ] = *i; #if 0 j = 0; for ( iterator i = a; i != e; ++i, ++j ) assert ( (*this)[j] == *i ); #endif #if defined(ARRAY864DEBUG) #if defined(_OPENMP) #pragma omp parallel for #endif for ( int64_t i = 0; i < static_cast<int64_t>(n); ++i ) assert ( (*this)[i] == a[i] ); #endif } }
/** * constructor by file name * * @param filename name of output file * @param bufsize size of output buffer * @param truncate true if file should be truncated false data should be appended * @param offset write offset in bytes **/ SynchronousGenericOutput(std::string const & filename, uint64_t const bufsize, bool const truncate = true, uint64_t const offset = 0, bool const /* metasync */ = true) : B(bufsize,false), pa(B.get()), pc(pa), pe(pa+B.getN()), PW ( truncate ? new ofstream_type(filename) : 0), PF ( truncate ? 0 : new std::fstream(filename.c_str(), std::ios::binary|std::ios::in|std::ios::out|std::ios::ate) ), W ( truncate ? (static_cast<std::ostream &>(*PW)) : (static_cast<std::ostream &>(*PF)) ), datawrittentofile(0) { W.seekp(offset,std::ios::beg); }
SocketFastReaderBase(::libmaus2::network::SocketBase * rsocket, uint64_t const bufsize) : socket(rsocket), B(bufsize), pa(B.get()), pc(pa), pe(pc), c(0) { }
/** * constructor * * @param filename file name * @param rnumbufs number of buffers * @param rbufsize size of each buffer * @param offset initial file offset **/ AsynchronousBufferReader( std::string const & filename, uint64_t const rnumbufs, uint64_t const rbufsize, uint64_t const offset ) : libmaus2::aio::InputStreamInstance(filename), bufsize(rnumbufs * rbufsize), abuffer(bufsize), buffer(abuffer.get()), av(true) { libmaus2::aio::InputStreamInstance::seekg(offset,std::ios::beg); }
/** * access operator * * @param i index of element to be accessed * @return element at index i **/ uint64_t operator[](uint64_t const i) const { if ( i >= n ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Access of element " << i << " >= " << n << " in Array864::operator[]"; se.finish(); throw se; } if ( ::libmaus2::bitio::getBit(B.get(),i) ) return A8[R->rank1(i)-1]; else return A64[R->rank0(i)-1]; }
FileBunchLRU ( std::vector < std::string > const & rfilenames, uint64_t rlrusize = 1024) : LRU(rlrusize), lrusize(rlrusize), filenames ( rfilenames ), mapping(filenames.size()), rmapping(lrusize), files(lrusize) { std::fill ( mapping.get(), mapping.get() + mapping.getN(), lrusize ); }
void writeContents() { // std::cerr << "writing buffer of " << f << " words." << std::endl; out.write( reinterpret_cast<char const *>(B.get()), f * sizeof(data_type) ); f = 0; }
void writeBuffer() { if ( pc-pa ) dst->writeMessage ( tag , B.get() , pc-pa ); pc = pa; }
static LCSResult lcs(std::string const & a, std::string const & b) { /* concatenate a and b into string c */ std::string c(a.size()+b.size()+2,' '); for ( uint64_t i = 0; i < a.size(); ++i ) c[i] = a[i]+2; c[a.size()] = 0; for ( uint64_t i = 0; i < b.size(); ++i ) c[a.size()+1+i] = b[i]+2; c[c.size()-1] = 1; // allocate suffix sorting ::libmaus2::autoarray::AutoArray<int32_t> SA(c.size(),false); // perform suffix sorting typedef ::libmaus2::suffixsort::DivSufSort<32,uint8_t *,uint8_t const *,int32_t *,int32_t const *,alphabet_size+2> sort_type; sort_type::divsufsort(reinterpret_cast<uint8_t const *>(c.c_str()), SA.get(), c.size()); // compute LCP array ::libmaus2::autoarray::AutoArray<int32_t> LCP = ::libmaus2::suffixsort::SkewSuffixSort<uint8_t,int32_t>::lcpByPlcp( reinterpret_cast<uint8_t const *>(c.c_str()), c.size(), SA.get()); // compute psv and nsv arrays for simulating parent operation on suffix tree ::libmaus2::autoarray::AutoArray<int32_t> const prev = ::libmaus2::sv::PSV::psv(LCP.get(),LCP.size()); ::libmaus2::autoarray::AutoArray<int32_t> const next = ::libmaus2::sv::NSV::nsv(LCP.get(),LCP.size()); #if defined(LCS_DEBUG) for ( uint64_t i = 0; i < c.size(); ++i ) { std::cerr << i << "\t" << LCP[i] << "\t" << prev[i] << "\t" << next[i] << "\t"; for ( std::string::const_iterator ita = c.begin()+SA[i]; ita != c.end(); ++ita ) if ( isalnum(*ita) ) std::cerr << *ita; else std::cerr << "<" << static_cast<int>(*ita) << ">" ; std::cerr << std::endl; } std::cerr << "---" << std::endl; #endif int32_t const n = c.size(); // queue all suffix tree leafs std::deque < QNode > Q; for ( int32_t i = 0; i < n; ++i ) Q.push_back ( QNode(i,i,0, (SA[i]< static_cast<int32_t>(a.size()+1)) ? 1:2, 1 ) ); // construct hash for tree nodes we have seen so far typedef ::libmaus2::util::unordered_set < QNode , HashQNode >::type hash_type; typedef hash_type::iterator hash_iterator_type; typedef hash_type::const_iterator hash_const_iterator_type; hash_type H(n); // we simulate a bottom up traversal of the generalised suffix tree for a and b while ( Q.size() ) { // get node and compute parent QNode const I = Q.front(); Q.pop_front(); QNode P = parent(I,LCP.get(),prev.get(),next.get(),n); // have we seen this node before? hash_iterator_type it = H.find(P); // no, insert it if ( it == H.end() ) { it = H.insert(P).first; } // yes, update symbol mask and extend visited interval else { it->symmask |= I.symmask; it->fill += (I.right-I.left+1); } // if this is not the root and the node is full (we have seen all its children), // then put it in the queue if ( P.right-P.left + 1 < n && it->isFull() ) Q.push_back(P); } // maximum lcp value int32_t maxlcp = 0; uint32_t maxpos_a = 0; uint32_t maxpos_b = 0; // consider all finished nodes for ( hash_const_iterator_type it = H.begin(); it != H.end(); ++it ) { #if defined(LCS_DEBUG) std::cerr << *it << std::endl; #endif // we need to have nodes from both strings a and b under this // node (sym mask has bits for 1 and 2 set) and the lcp value must be // larger than what we already have if ( it->symmask == 3 && it->depth > maxlcp ) { maxlcp = it->depth; for ( int32_t q = it->left; q <= it->right; ++q ) { if ( SA[q] < static_cast<int32_t>(a.size()) ) maxpos_a = SA[q]; else maxpos_b = SA[q] - (a.size()+1); } } } return LCSResult(maxlcp,maxpos_a,maxpos_b); }
/** * constructor * * @param filename output file name * @param bufsize size of output buffer in elements **/ OutputBuffer(std::string const & filename, uint64_t const bufsize) : B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), W(filename,16) { }
CompactReadContainer( std::vector<std::string> const & filenames, ::libmaus2::fastx::FastInterval const & rFI, bool const verbose = false ) : FI(rFI), numreads(FI.high-FI.low), designators( (numreads+63)/64 ), shortpointers(numreads,false), longpointers(), text(FI.fileoffsethigh-FI.fileoffset,false) { typedef ::libmaus2::fastx::CompactFastConcatDecoder reader_type; // typedef reader_type::pattern_type pattern_type; reader_type CFD(filenames,FI); uint64_t codepos = 0; uint64_t offsetbase = 0; // bool const verbose = true; uint64_t const mod = std::max((numreads+50)/100,static_cast<uint64_t>(1)); uint64_t const bmod = libmaus2::math::nextTwoPow(mod); uint64_t const bmask = bmod-1; if ( verbose ) { if ( isatty(STDERR_FILENO) ) std::cerr << "Computing designators/pointers..."; else std::cerr << "Computing designators/pointers..." << std::endl; } std::vector < uint64_t > prelongpointers; prelongpointers.push_back(0); writer_type W(designators.get()); for ( uint64_t i = 0; i < numreads; ++i ) { if ( ( codepos-offsetbase > static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) ) ) { W.writeBit(1); offsetbase = codepos; prelongpointers.push_back(offsetbase); } else { W.writeBit(0); } shortpointers[i] = codepos-offsetbase; CFD.skipPattern(codepos); if ( verbose && ((i & (bmask)) == 0) ) { if ( isatty(STDERR_FILENO) ) std::cerr << "(" << i/static_cast<double>(numreads) << ")"; else std::cerr << "Finished " << i/static_cast<double>(numreads) << std::endl; } } W.flush(); longpointers = ::libmaus2::autoarray::AutoArray< uint64_t >(prelongpointers.size(),false); std::copy(prelongpointers.begin(),prelongpointers.end(),longpointers.begin()); if ( verbose ) std::cerr << "Done." << std::endl; if ( verbose ) std::cerr << "Loading text..."; std::vector < libmaus2::aio::FileFragment > const frags = ::libmaus2::fastx::CompactFastDecoder::getDataFragments(filenames); ::libmaus2::aio::ReorderConcatGenericInput<uint8_t> RCGI(frags,64*1024,text.size(),FI.fileoffset); uint64_t const textread = RCGI.read(text.begin(),text.size()); if ( textread != text.size() ) { libmaus2::exception::LibMausException se; se.getStream() << "Failed to read text in CompactReadContainer." << std::endl; se.finish(); throw se; } if ( verbose ) std::cerr << "done." << std::endl; if ( verbose ) std::cerr << "Setting up rank dictionary for designators..."; setupRankDictionary(); if ( verbose ) std::cerr << "done." << std::endl; #if 0 std::cerr << "Checking dict..."; reader_type CFD2(filenames,FI); for ( uint64_t i = 0; i < numreads; ++i ) { if ( CFD2.istr.getptr != longpointers [ designatorrank->rank1(i) ] + shortpointers[i] ) { std::cerr << "Failure for i=" << i << std::endl; std::cerr << "Ptr is " << CFD2.istr.getptr << std::endl; std::cerr << "Expected " << longpointers [ designatorrank->rank1(i) ] + shortpointers[i] << std::endl; assert ( CFD2.istr.getptr == longpointers [ designatorrank->rank1(i) ] + shortpointers[i] ); } ::libmaus2::fastx::Pattern pattern; CFD2.getNextPatternUnlocked(pattern); } std::cerr << "done." << std::endl; #endif }
EditDistanceResult process( iterator_a a, uint64_t const n, iterator_b b, uint64_t const m, uint64_t const k = 0, similarity_type const gain_match = 1, similarity_type const penalty_subst = 1, similarity_type const penalty_ins = 1, similarity_type const penalty_del = 1 ) { setup(n,m,k); element_type * p = M.begin(); int64_t firstpen = 0; for ( uint64_t i = 0; i < n1; ++i, firstpen -= penalty_del ) *(p++) = element_type(firstpen,STEP_DEL); element_type * q = M.begin(); iterator_a const ae = a+n; iterator_b const be = b+m; while ( b != be ) { typename std::iterator_traits<iterator_b>::value_type const bchar = *(b++); assert ( (p-M.begin()) % n1 == 0 ); assert ( (q-M.begin()) % n1 == 0 ); // top *p = element_type(q->first-penalty_ins,STEP_INS); for ( iterator_a aa = a; aa != ae; ++aa ) { // left similarity_type const left = p->first - penalty_del; // diagonal match? bool const dmatch = (*aa == bchar); // diagonal similarity_type const diag = dmatch ? (q->first + gain_match) : (q->first - penalty_subst); // move pointer in row above q++; // top similarity_type const top = q->first - penalty_ins; // move pointer in current row p++; switch ( edit_distance_priority ) { case del_ins_diag: if ( left >= top ) { if ( left >= diag ) // left *p = element_type(left,STEP_DEL); else // diag *p = element_type(diag,dmatch ? STEP_MATCH : STEP_MISMATCH); } // top >= left else { if ( top >= diag ) // top *p = element_type(top,STEP_INS); else // diag *p = element_type(diag,dmatch ? STEP_MATCH : STEP_MISMATCH); } break; case diag_del_ins: if ( diag >= left ) { if ( diag >= top ) // diag *p = element_type(diag,dmatch ? STEP_MATCH : STEP_MISMATCH); else // top *p = element_type(top,STEP_INS); } else { if ( left >= top ) // left *p = element_type(left,STEP_DEL); else // top *p = element_type(top,STEP_INS); } break; } } p++; q++; } b -= m; uint64_t i = n; uint64_t j = m; element_type * pq = M.get() + j*n1 + i; ta = te; uint64_t numdel = 0; uint64_t numins = 0; uint64_t nummat = 0; uint64_t nummis = 0; while ( pq != M.begin() ) { *(--ta) = pq->second; switch ( pq->second ) { // previous row case STEP_INS: pq -= n1; numins++; break; // previous column case STEP_DEL: pq -= 1; numdel++; break; // diagonal case STEP_MATCH: pq -= (n1+1); nummat++; break; // diagonal case STEP_MISMATCH: pq -= (n1+1); nummis++; break; default: break; } } return EditDistanceResult(numins,numdel,nummat,nummis); }
void setupRankDictionary() { rank_ptr_type tdesignatorrank(new rank_type(designators.get(), designators.size()*64)); designatorrank = UNIQUE_PTR_MOVE(tdesignatorrank); }