void compress(InputBuffer& input, OutputStream& output) { header h; h.fileSize = input.getSize(); uint8* pInputBuffer = input.getBuffer(); uint8* pOutputBuffer = new uint8[h.fileSize]; RLE(pInputBuffer, pOutputBuffer, h.fileSize, &h.tempSize); BWT(pOutputBuffer, pInputBuffer, h.tempSize, &h.first); MTF(pInputBuffer, h.tempSize); optimizeIncrements(pInputBuffer, h.tempSize, h.increments); #if 0 float model_stats[8]; getModelStatistics(pInputBuffer, h.tempSize, h.increments, model_stats); float sum = 0; for (uint32 i = 0; i < 8; i++) { printf("model #%d: %12.2f bits (increment: %3d)\n", i, model_stats[i], h.increments[i]); sum += model_stats[i]; } printf("%f bpb\n", sum / h.fileSize); #endif writeHeader(output, h); ArithmeticEncoder encoder(output); StructuredModel model(h.increments); model.encode(encoder, pInputBuffer, h.tempSize); delete[] pInputBuffer; delete[] pOutputBuffer; }
::libmaus2::bitio::CompactArray::unique_ptr_type bwtDivSufSortCompact(::libmaus2::bitio::CompactArray const & C, bool const verbose = false) { typedef ::libmaus2::bitio::CompactArray::const_iterator text_const_iterator; typedef ::libmaus2::bitio::CompactArray::iterator text_iterator; typedef ::libmaus2::bitio::SignedCompactArray::const_iterator sa_const_iterator; typedef ::libmaus2::bitio::SignedCompactArray::iterator sa_iterator; uint64_t const bitwidth = 64; typedef ::libmaus2::suffixsort::DivSufSort<bitwidth,text_iterator,text_const_iterator,sa_iterator,sa_const_iterator> sort_type; uint64_t const n = C.size(); uint64_t const b = C.getB(); ::libmaus2::bitio::SignedCompactArray SA(n, ::libmaus2::math::bitsPerNum(n) + 1 ); if ( verbose ) std::cerr << "Running divsufsort..."; ::libmaus2::timing::RealTimeClock drtc; drtc.start(); sort_type::divsufsort ( C.begin(), SA.begin(), n ); if ( verbose ) std::cerr << "done, time " << drtc.getElapsedSeconds() << std::endl; ::libmaus2::bitio::CompactArray::unique_ptr_type BWT(new ::libmaus2::bitio::CompactArray(n,b)); for ( uint64_t i = 0; i < n; ++i ) if ( SA.get(i) ) BWT -> set ( i, C.get(SA.get(i)-1) ); else BWT -> set ( i, C.get(n-1) ); return UNIQUE_PTR_MOVE(BWT); }
CSA::CSA(uchar *text, ulong n, unsigned samplerate, const char *loadFromFile, const char *saveToFile) { this->n = n; this->samplerate = samplerate; uchar *bwt; if (loadFromFile != 0) bwt = LoadFromFile(loadFromFile); else bwt = BWT(text); if (saveToFile != 0) SaveToFile(saveToFile, bwt); ulong i,min = 0, max; for (i=0;i<256;i++) C[i]=0; for (i=0;i<n;++i) C[(int)bwt[i]]++; for (i=0;i<256;i++) if (C[i]>0) {min = i; break;} for (i=255;i>=min;--i) if (C[i]>0) {max = i; break;} ulong prev=C[0], temp; C[0]=0; for (i=1;i<256;i++) { temp = C[i]; C[i]=C[i-1]+prev; prev = temp; } this->codetable = node::makecodetable(bwt,n); alphabetrank = new THuffAlphabetRank(bwt,n, this->codetable,0); //if (alphabetrank->Test(bwt,n)) printf("alphabetrank ok\n"); delete [] bwt; // Make tables maketables(); // to avoid running out of unsigned, the sizes are computed in specific order (large/small)*small // |class CSA| +256*|TCodeEntry|+|C[]|+|suffixes[]+positions[]|+... //printf("FMindex takes %d B\n", // 6*W/8+256*3*W/8+256*W/8+ (2*n/(samplerate*8))*W+sampled->SpaceRequirementInBits()/8+alphabetrank->SpaceRequirementInBits()/8+W/8); }
::libmaus2::bitio::CompactArray::unique_ptr_type bwtDivSufSort(::libmaus2::bitio::CompactArray const & C, bool const verbose = false) { if ( C.n < (1ull << 31) ) { typedef ::libmaus2::bitio::CompactArray::const_iterator text_const_iterator; typedef ::libmaus2::bitio::CompactArray::iterator text_iterator; typedef int32_t const * sa_const_iterator; typedef int32_t * sa_iterator; uint64_t const bitwidth = 64; typedef ::libmaus2::suffixsort::DivSufSort<bitwidth,text_iterator,text_const_iterator,sa_iterator,sa_const_iterator> sort_type; uint64_t const n = C.size(); uint64_t const b = C.getB(); ::libmaus2::autoarray::AutoArray< int32_t > SA(n,false); if ( verbose ) std::cerr << "Running divsufsort..."; ::libmaus2::timing::RealTimeClock drtc; drtc.start(); sort_type::divsufsort ( text_const_iterator(&C), SA.get(), n ); if ( verbose ) std::cerr << "done, time " << drtc.getElapsedSeconds() << std::endl; ::libmaus2::bitio::CompactArray::unique_ptr_type BWT(new ::libmaus2::bitio::CompactArray(n,b)); for ( uint64_t i = 0; i < n; ++i ) if ( SA.get(i) ) BWT -> set ( i, C.get(SA.get(i)-1) ); else BWT -> set ( i, C.get(n-1) ); return UNIQUE_PTR_MOVE(BWT); } else { typedef ::libmaus2::bitio::CompactArray::const_iterator text_const_iterator; typedef ::libmaus2::bitio::CompactArray::iterator text_iterator; typedef int64_t const * sa_const_iterator; typedef int64_t * sa_iterator; uint64_t const bitwidth = 64; typedef ::libmaus2::suffixsort::DivSufSort<bitwidth,text_iterator,text_const_iterator,sa_iterator,sa_const_iterator> sort_type; uint64_t const n = C.size(); uint64_t const b = C.getB(); ::libmaus2::autoarray::AutoArray< int64_t > SA(n,false); if ( verbose ) std::cerr << "Running divsufsort..."; ::libmaus2::timing::RealTimeClock drtc; drtc.start(); sort_type::divsufsort ( text_const_iterator(&C), SA.get(), n ); if ( verbose ) std::cerr << "done, time " << drtc.getElapsedSeconds() << std::endl; ::libmaus2::bitio::CompactArray::unique_ptr_type BWT(new ::libmaus2::bitio::CompactArray(n,b)); for ( uint64_t i = 0; i < n; ++i ) if ( SA.get(i) ) BWT -> set ( i, C.get(SA.get(i)-1) ); else BWT -> set ( i, C.get(n-1) ); return UNIQUE_PTR_MOVE(BWT); } }