예제 #1
0
void compress(InputBuffer& input, OutputStream& output) {

	header h;

	h.fileSize = input.getSize();
	uint8* pInputBuffer = input.getBuffer();
	uint8* pOutputBuffer = new uint8[h.fileSize];

	RLE(pInputBuffer, pOutputBuffer, h.fileSize, &h.tempSize);
	BWT(pOutputBuffer, pInputBuffer, h.tempSize, &h.first);
	MTF(pInputBuffer, h.tempSize);

	optimizeIncrements(pInputBuffer, h.tempSize, h.increments);

#if 0
	float model_stats[8];
	getModelStatistics(pInputBuffer, h.tempSize, h.increments,  model_stats);

	float sum = 0;
	for (uint32 i = 0; i < 8; i++) {
		printf("model #%d: %12.2f bits (increment: %3d)\n", i, model_stats[i], h.increments[i]);
		sum += model_stats[i];
	}
	printf("%f bpb\n", sum / h.fileSize);
#endif

	writeHeader(output, h);
	ArithmeticEncoder	encoder(output);
	StructuredModel		model(h.increments);
	model.encode(encoder, pInputBuffer, h.tempSize);

	delete[] pInputBuffer;
	delete[] pOutputBuffer;
}
예제 #2
0
파일: testOS.cpp 프로젝트: dkj/libmaus2
::libmaus2::bitio::CompactArray::unique_ptr_type bwtDivSufSortCompact(::libmaus2::bitio::CompactArray const & C, bool const verbose = false)
{
	typedef ::libmaus2::bitio::CompactArray::const_iterator text_const_iterator;
	typedef ::libmaus2::bitio::CompactArray::iterator text_iterator;
	typedef ::libmaus2::bitio::SignedCompactArray::const_iterator sa_const_iterator;
	typedef ::libmaus2::bitio::SignedCompactArray::iterator sa_iterator;
	
	uint64_t const bitwidth = 64;
	typedef ::libmaus2::suffixsort::DivSufSort<bitwidth,text_iterator,text_const_iterator,sa_iterator,sa_const_iterator> sort_type;

	uint64_t const n = C.size();
	uint64_t const b = C.getB();
	::libmaus2::bitio::SignedCompactArray SA(n, ::libmaus2::math::bitsPerNum(n) + 1 );

	if ( verbose )
		std::cerr << "Running divsufsort...";
	::libmaus2::timing::RealTimeClock drtc; drtc.start();
	sort_type::divsufsort ( C.begin(), SA.begin(), n );
	if ( verbose )
		std::cerr << "done, time " << drtc.getElapsedSeconds() << std::endl;
	
	::libmaus2::bitio::CompactArray::unique_ptr_type BWT(new ::libmaus2::bitio::CompactArray(n,b));
	
	for ( uint64_t i = 0; i < n; ++i )
		if ( SA.get(i) )
			BWT -> set ( i, C.get(SA.get(i)-1) );
		else
			BWT -> set ( i, C.get(n-1) );
	
	return UNIQUE_PTR_MOVE(BWT);
}
예제 #3
0
파일: CSA.cpp 프로젝트: anuragkh/ds-lib
CSA::CSA(uchar *text, ulong n, unsigned samplerate, const char *loadFromFile, const char *saveToFile) {
    this->n = n;
    this->samplerate = samplerate;

    uchar *bwt;
    if (loadFromFile != 0)
        bwt = LoadFromFile(loadFromFile);
    else
        bwt = BWT(text);
    if (saveToFile != 0)
        SaveToFile(saveToFile, bwt);

    ulong i,min = 0,
             max;
    for (i=0;i<256;i++)
        C[i]=0;
    for (i=0;i<n;++i)
        C[(int)bwt[i]]++;
    for (i=0;i<256;i++)
        if (C[i]>0) {min = i; break;}          
    for (i=255;i>=min;--i)
        if (C[i]>0) {max = i; break;}                    
    ulong prev=C[0], temp;
    C[0]=0;
    for (i=1;i<256;i++) {          
        temp = C[i];
        C[i]=C[i-1]+prev;
        prev = temp;
    }
    this->codetable = node::makecodetable(bwt,n);
    alphabetrank = new THuffAlphabetRank(bwt,n, this->codetable,0);   
    //if (alphabetrank->Test(bwt,n)) printf("alphabetrank ok\n");    
    delete [] bwt;

    // Make tables
    maketables();
    // to avoid running out of unsigned, the sizes are computed in specific order (large/small)*small
    // |class CSA| +256*|TCodeEntry|+|C[]|+|suffixes[]+positions[]|+...       
    //printf("FMindex takes %d B\n",
    //    6*W/8+256*3*W/8+256*W/8+ (2*n/(samplerate*8))*W+sampled->SpaceRequirementInBits()/8+alphabetrank->SpaceRequirementInBits()/8+W/8);
}
예제 #4
0
파일: testOS.cpp 프로젝트: dkj/libmaus2
::libmaus2::bitio::CompactArray::unique_ptr_type bwtDivSufSort(::libmaus2::bitio::CompactArray const & C, bool const verbose = false)
{
	if ( C.n < (1ull << 31) )
	{
		typedef ::libmaus2::bitio::CompactArray::const_iterator text_const_iterator;
		typedef ::libmaus2::bitio::CompactArray::iterator text_iterator;
		typedef int32_t const * sa_const_iterator;
		typedef int32_t * sa_iterator;
		
		uint64_t const bitwidth = 64;
		typedef ::libmaus2::suffixsort::DivSufSort<bitwidth,text_iterator,text_const_iterator,sa_iterator,sa_const_iterator> sort_type;

		uint64_t const n = C.size();
		uint64_t const b = C.getB();
		::libmaus2::autoarray::AutoArray< int32_t > SA(n,false);

		if ( verbose )
			std::cerr << "Running divsufsort...";
		::libmaus2::timing::RealTimeClock drtc; drtc.start();
		sort_type::divsufsort ( text_const_iterator(&C), SA.get(), n );
		if ( verbose )
			std::cerr << "done, time " << drtc.getElapsedSeconds() << std::endl;
		
		::libmaus2::bitio::CompactArray::unique_ptr_type BWT(new ::libmaus2::bitio::CompactArray(n,b));
		
		for ( uint64_t i = 0; i < n; ++i )
			if ( SA.get(i) )
				BWT -> set ( i, C.get(SA.get(i)-1) );
			else
				BWT -> set ( i, C.get(n-1) );
		
		return UNIQUE_PTR_MOVE(BWT);
	}
	else
	{
		typedef ::libmaus2::bitio::CompactArray::const_iterator text_const_iterator;
		typedef ::libmaus2::bitio::CompactArray::iterator text_iterator;
		typedef int64_t const * sa_const_iterator;
		typedef int64_t * sa_iterator;
		
		uint64_t const bitwidth = 64;
		typedef ::libmaus2::suffixsort::DivSufSort<bitwidth,text_iterator,text_const_iterator,sa_iterator,sa_const_iterator> sort_type;

		uint64_t const n = C.size();
		uint64_t const b = C.getB();
		::libmaus2::autoarray::AutoArray< int64_t > SA(n,false);

		if ( verbose )
			std::cerr << "Running divsufsort...";
		::libmaus2::timing::RealTimeClock drtc; drtc.start();
		sort_type::divsufsort ( text_const_iterator(&C), SA.get(), n );
		if ( verbose )
			std::cerr << "done, time " << drtc.getElapsedSeconds() << std::endl;
		
		::libmaus2::bitio::CompactArray::unique_ptr_type BWT(new ::libmaus2::bitio::CompactArray(n,b));
		
		for ( uint64_t i = 0; i < n; ++i )
			if ( SA.get(i) )
				BWT -> set ( i, C.get(SA.get(i)-1) );
			else
				BWT -> set ( i, C.get(n-1) );
		
		return UNIQUE_PTR_MOVE(BWT);
	
	}
}