void putTerm(uint64_t num) { uint8_t * p = termbuf.get() + termbuf.getN(); for ( unsigned int i = 0; i < expo; ++i ) { *(--p) = (num % base) + 1; num /= base; } assert ( p == termbuf.get() ); for ( unsigned int i = 0; i < expo; ++i ) put( *(p++) ); }
SynchronousGenericInput( std::string const & filename, uint64_t const rbufsize, uint64_t const roffset = 0, uint64_t const rtotalwords = std::numeric_limits<uint64_t>::max() ) : bufsize(rbufsize), buffer(bufsize,false), pa(buffer.get()), pc(pa), pe(pa), Pistr(new ifstream_type(filename.c_str(),std::ios::binary)), istr(*Pistr), totalwords ( std::min ( ::libmaus::util::GetFileSize::getFileSize(filename) / sizeof(input_type) - roffset, rtotalwords) ), totalwordsread(0), checkmod(true) { if ( ! Pistr->is_open() ) { ::libmaus::exception::LibMausException se; se.getStream() << "Unable to open file " << filename << ": " << strerror(errno); se.finish(); throw se; } Pistr->seekg(roffset * sizeof(input_type), std::ios::beg); if ( ! istr ) { ::libmaus::exception::LibMausException se; se.getStream() << "Unable to seek file " << filename << ": " << strerror(errno); se.finish(); throw se; } }
bool fillBuffer() { assert ( totalwordsread <= totalwords ); uint64_t const remwords = totalwords-totalwordsread; uint64_t const toreadwords = std::min(remwords,bufsize); istr.read ( reinterpret_cast<char *>(buffer.get()), toreadwords * sizeof(input_type)); uint64_t const bytesread = istr.gcount(); if ( checkmod && (bytesread % sizeof(input_type) != 0) ) { ::libmaus::exception::LibMausException se; se.getStream() << "SynchronousGenericInput::fillBuffer: Number of bytes " << bytesread << " read is not a multiple of entity type." << std::endl; se.finish(); throw se; } uint64_t const wordsread = bytesread / sizeof(input_type); if ( wordsread == 0 ) { if ( totalwordsread != totalwords ) { std::cerr << "SynchronousGenericInput<>::getNext(): WARNING: read 0 words but there should be " << remwords << " left." << std::endl; } return false; } pc = pa; pe = pa + wordsread; return true; }
SocketOutputBufferTemplate( ::libmaus::network::SocketBase * rdst, int const rtag, uint64_t const bufsize) : dst(rdst), tag(rtag), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()) { }
AsynchronousBufferReader ( std::string const & rfilename, uint64_t rnumbuffers = 16, uint64_t rbufsize = 32, uint64_t roffset = 0 ) : filename(rfilename), fd( open(filename.c_str(),O_RDONLY ) ), numbuffers(rnumbuffers), bufsize(rbufsize), bufferspace ( numbuffers * bufsize ), buffers ( numbuffers ), contexts(numbuffers), low(0), high(0), offset(roffset) { if ( fd < 0 ) { ::libmaus::exception::LibMausException se; se.getStream() << "::libmaus::aio::AsynchronousBufferReader: Failed to open file " << filename << ": " << strerror(errno); se.finish(); /* std::cerr << se.s << std::endl; kill ( getpid(), SIGSTOP ); */ throw se; } for ( unsigned int i = 0; i < numbuffers; ++i ) buffers[i] = bufferspace.get() + i*bufsize; while ( high < numbuffers ) enqueRead(); }
/** * constructor * * @param rW output writer * @param bufsize size of buffer * @param rmetaid meta information for each written block **/ MetaOutputBuffer8( ::libmaus::aio::AsynchronousWriter & rW, uint64_t const bufsize, uint64_t const rmetaid) : B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), W(rW), metaid(rmetaid) { }
CharTermTable(uint8_t c) : atable(257), table(atable.get()+1) { for ( unsigned int i = 0; i < 256; ++i ) table[i] = false; table[-1] = true; table[c] = true; }
SynchronousOutputBuffer8(std::string const & rfilename, uint64_t const bufsize, bool truncate = true) : filename(rfilename), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()) { if ( truncate ) { std::ofstream ostr(filename.c_str(), std::ios::binary); ostr.flush(); } }
SocketFastReaderBase(::libmaus::network::SocketBase * rsocket, uint64_t const bufsize) : socket(rsocket), B(bufsize), pa(B.get()), pc(pa), pe(pc), c(0) { }
AsynchronousBufferReader( std::string const & filename, uint64_t const rnumbufs, uint64_t const rbufsize, uint64_t const offset ) : std::ifstream(filename.c_str()), bufsize(rnumbufs * rbufsize), abuffer(bufsize), buffer(abuffer.get()), av(true) { std::ifstream::seekg(offset,std::ios::beg); }
bool getNextTriple(TripleEdge & triple) { if ( ! curbufleft ) { istr.read ( reinterpret_cast<char *>(B.get()), B.getN() * sizeof(TripleEdge) ); if ( istr.gcount() == 0 ) return false; assert ( istr.gcount() % sizeof(TripleEdge) == 0 ); curbufleft = istr.gcount() / sizeof(TripleEdge); assert ( curbufleft ); curtrip = B.get(); } triple = *(curtrip++); curbufleft -= 1; return true; }
static void merge(std::vector<std::string> const & inputfilenames, std::string const & outputfilename) { CheckOverlapResultMergeInput in(inputfilenames); std::ofstream ostr(outputfilename.c_str(),std::ios::binary); CheckOverlapResult::shared_ptr_type ptr; while ( (ptr=in.get()) ) ptr->serialise(ostr); ostr.flush(); assert ( ostr ); ostr.close(); }
SynchronousGenericInput(std::istream & ristr, uint64_t const rbufsize, uint64_t const rtotalwords = std::numeric_limits<uint64_t>::max(), bool const rcheckmod = true ) : bufsize(rbufsize), buffer(bufsize,false), pa(buffer.get()), pc(pa), pe(pa), Pistr(), istr(ristr), totalwords ( rtotalwords ), totalwordsread(0), checkmod(rcheckmod) { }
SynchronousOutputBuffer8Posix(std::string const & rfilename, uint64_t const bufsize, bool truncate = true) : filename(rfilename), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), ptr(0) { if ( truncate ) { int const tres = ::truncate(filename.c_str(),0); if ( tres ) { ::libmaus::exception::LibMausException se; se.getStream() << "SynchronousOutputBuffer8Posix::SynchronousOutputBuffer8Posix(): truncate() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } } }
SynchronousGenericOutputPosix( std::string const & rfilename, uint64_t const bufsize, bool const truncate, uint64_t const offset, bool const rmetasync = true ) : filename(rfilename), dirname(::libmaus::util::ArgInfo::getDirName(filename)), metasync(rmetasync), B(bufsize), pa(B.get()), pc(pa), pe(pa+B.getN()), fd ( -1 ), totalwrittenbytes(0), totalwrittenwords(0) { while ( (fd = ::open(filename.c_str(), truncate ? (O_WRONLY|O_TRUNC|O_CREAT) : O_WRONLY , 0755 )) < 0 ) { switch ( errno ) { case EINTR: { std::cerr << "Restarting open() system call interupted by signal." << std::endl; break; } default: { ::libmaus::exception::LibMausException se; se.getStream() << "Failed to open file "<< filename <<" in SynchronousGenericOutputPosix: " << strerror(errno); se.finish(); throw se; } } } if ( lseek ( fd, offset, SEEK_SET) == static_cast<off_t>(-1) ) { close(fd); ::libmaus::exception::LibMausException se; se.getStream() << "Failed to seek " << filename << " in SynchronousGenericOutputPosix: " << strerror(errno); se.finish(); throw se; } #if 0 std::cerr << "File " << filename << " opened for output in " << ::libmaus::util::Demangle::demangle<this_type>() << std::endl; #endif }
uint64_t lookupValue(uint64_t const v) const { if ( !index.size() ) return 0; typedef IndexEntryValueGetAdapter<IndexEntry const *> adapter_type; adapter_type IEKGA(index.get()); ::libmaus::util::ConstIterator<adapter_type,uint64_t> const ita(&IEKGA); ::libmaus::util::ConstIterator<adapter_type,uint64_t> ite(ita); ite += index.size(); ::libmaus::util::ConstIterator<adapter_type,uint64_t> R = ::std::lower_bound(ita,ite,v); if ( R == ite ) return index.size()-1; if ( v == *R ) return R-ita; else return (R-ita)-1; }
FileBunchLRU ( std::vector < std::string > const & rfilenames, uint64_t rlrusize = 1024) : LRU(rlrusize), lrusize(rlrusize), filenames ( rfilenames ), mapping(filenames.size()), rmapping(lrusize), files(lrusize) { std::fill ( mapping.get(), mapping.get() + mapping.getN(), lrusize ); }
static std::pair<uint32_t,uint32_t> lcs(std::string const & a, std::string const & b) { /* concatenate a and b into string c */ std::string c(a.size()+b.size()+2,' '); for ( uint64_t i = 0; i < a.size(); ++i ) c[i] = a[i]+2; c[a.size()] = 0; for ( uint64_t i = 0; i < b.size(); ++i ) c[a.size()+1+i] = b[i]+2; c[c.size()-1] = 1; // allocate suffix sorting ::libmaus::autoarray::AutoArray<int32_t> SA(c.size(),false); // perform suffix sorting typedef ::libmaus::suffixsort::DivSufSort<32,uint8_t *,uint8_t const *,int32_t *,int32_t const *,8> sort_type; typedef sort_type::saidx_t saidx_t; sort_type::divsufsort(reinterpret_cast<uint8_t const *>(c.c_str()), SA.get(), c.size()); // compute LCP array ::libmaus::autoarray::AutoArray<int32_t> LCP = ::libmaus::suffixsort::SkewSuffixSort<uint8_t,int32_t>::lcpByPlcp( reinterpret_cast<uint8_t const *>(c.c_str()), c.size(), SA.get()); // compute psv and nsv arrays for simulating parent operation on suffix tree ::libmaus::autoarray::AutoArray<int32_t> const prev = ::libmaus::sv::PSV::psv(LCP.get(),LCP.size()); ::libmaus::autoarray::AutoArray<int32_t> const next = ::libmaus::sv::NSV::nsv(LCP.get(),LCP.size()); #if defined(LCS_DEBUG) for ( uint64_t i = 0; i < c.size(); ++i ) { std::cerr << i << "\t" << LCP[i] << "\t" << prev[i] << "\t" << next[i] << "\t"; for ( std::string::const_iterator ita = c.begin()+SA[i]; ita != c.end(); ++ita ) if ( isalnum(*ita) ) std::cerr << *ita; else std::cerr << "<" << static_cast<int>(*ita) << ">" ; std::cerr << std::endl; } std::cerr << "---" << std::endl; #endif int32_t const n = c.size(); // queue all suffix tree leafs std::deque < QNode > Q; for ( int32_t i = 0; i < n; ++i ) Q.push_back ( QNode(i,i,0, (SA[i]< static_cast<int32_t>(a.size()+1)) ? 1:2, 1 ) ); // construct hash for tree nodes we have seen so far typedef ::libmaus::util::unordered_set < QNode , HashQNode >::type hash_type; typedef hash_type::iterator hash_iterator_type; typedef hash_type::const_iterator hash_const_iterator_type; hash_type H(n); // we simulate a bottom up traversal of the generalised suffix tree for a and b while ( Q.size() ) { // get node and compute parent QNode const I = Q.front(); Q.pop_front(); QNode P = parent(I,LCP.get(),prev.get(),next.get(),n); // have we seen this node before? hash_iterator_type it = H.find(P); // no, insert it if ( it == H.end() ) { it = H.insert(P).first; } // yes, update symbol mask and extend visited interval else { it->symmask |= I.symmask; it->fill += (I.right-I.left+1); } // if this is not the root and the node is full (we have seen all its children), // then put it in the queue if ( P.right-P.left + 1 < n && it->isFull() ) Q.push_back(P); } // maximum lcp value int32_t maxlcp = 0; uint32_t maxpos = 0; // consider all finished nodes for ( hash_const_iterator_type it = H.begin(); it != H.end(); ++it ) { #if defined(LCS_DEBUG) std::cerr << *it << std::endl; #endif // we need to have nodes from both strings a and b under this // node (sym mask has bits for 1 and 2 set) and the lcp value must be // larger than what we already have if ( it->symmask == 3 && it->depth > maxlcp ) { maxlcp = it->depth; maxpos = SA[it->left]; } } return std::pair<uint32_t,uint32_t>(maxlcp,maxpos); }
void writeBuffer() { if ( pc-pa ) dst->writeMessage ( tag , B.get() , pc-pa ); pc = pa; }
SlowCumFreq(uint64_t rs) : s(rs), A(s,false) { std::fill(A.get(),A.get()+s,0); }