Beispiel #1
0
    Codeword*
    Huffman::obtainCodewords()
    {
    	// *************************************
    	// This implementation is optimized for managing codewords up to 32 bits.
    	// *************************************

    	// Codewords are right-aligned to be compliant with the decoding table
    	Codeword* codewords = new Codeword[huff_table.max+1];

    	// The Huffman tree is rebuilt for the new code assignment
    	tree = new BinaryNode();

    	uint *stream = new uint[256];
    	for (uint i=0; i<256; i++) stream[i] = 0;

    	//for (uint i=0; i<=huff_table.max; i++)
    	for (uint i=0; i<=255; i++)
    	{
    		BinaryNode *current = tree;

    		codewords[i].bits = encodeHuff(huff_table, i, stream, (size_t)0);
    		codewords[i].codeword = 0;

    		for (uint j=0; j<codewords[i].bits; j++)
    		{
    			bool bit = (stream[0] >> j) & 1;
    			codewords[i].codeword = codewords[i].codeword << 1;
    			codewords[i].codeword = (codewords[i].codeword | bit);

    			if (bit == 0)
    			{
    				if (current->leftChild == NULL) current->leftChild = new BinaryNode();
    				current = current->leftChild;
    			}
    			else
    			{
    				if (current->rightChild == NULL) current->rightChild = new BinaryNode();
    				current = current->rightChild;
    			}
    		}

    		current->position = i;
    		stream[0] = 0;
    	}

    	delete [] stream;
    	return codewords;
    }
Beispiel #2
0
GonzaloCompressedPsi gonzaloCompressPsi(uint *Psi, uint psiSize, uint T, uint HUFF) {
	
	GonzaloCompressedPsi compressedPsi;
	
	register uint i;
	uint oi,j;
	int ok,k;
	register uint _cptr;

	uint *_cPsi;
	uint *_bposS;
		
	uint links = psiSize;
	uint samplen = T;		
	uint _bplen;
	uint pslen;	
	uint totexc;
		
	uint *acc,*lacc;
	THuff Hacc, Hlen;
	
	uint totalSize;
	
	// Construe os arboles de huffman, o dos valores directos
	// e o das lonxitudes dos runs. Usa como vectores auxiliares de frecuencias
	// a acc e lacc, que finalmente libera.
	acc = (uint *)malloc (HUFF*sizeof(uint));
	lacc = (uint *)malloc ((samplen-1)*sizeof(uint));
	for (k=0;k<HUFF;k++) acc[k]=0;
	for (k=0;k<samplen-1;k++) lacc[k]=0;
	
	ok = 0; 
	k = Psi[0];
	for (i=0;i<=links;i++) { 
		if ((k == 1) && (i % samplen)) { if (ok != 1) oi = i; }
		else { 
			if (ok == 1) { 
				acc[1]++;
			    lacc[i-oi-1]++;
		  	}
		    if (i % samplen) 
				if ((k < 1) || (k >= HUFF)) acc[0]++;
		      	else acc[k]++;
		}
	    ok = (i % samplen) ? k : 0;
		k = Psi[i+1]-Psi[i];
	}
	    
	if (ok == 1) { 
		acc[1]++; 
		lacc[i-oi-1]++;
	}
	
	Hacc = createHuff (acc,HUFF-1, UNSORTED);
	Hlen = createHuff (lacc,samplen-2, UNSORTED);
	totexc = acc[0];
	pslen = bits(psiSize+1);
	_bplen = bits(Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen);
	_bposS = (uint *)malloc ((((1+links/samplen)*_bplen+W-1)/W)*sizeof(uint));
	_cPsi  = (uint *)malloc (((Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen+W-1)/W)*sizeof(uint));	
	
	_cptr = 0; 
	ok = 0; 
	k = Psi[0];
	
	for (i=0;i<=links;i++) { 
		
		if ((k == 1) && (i % samplen)) { if (ok != 1) oi = i; }
		else { 
			if (ok == 1) { 
				_cptr = encodeHuff (Hacc,1,_cPsi,_cptr);
			    _cptr = encodeHuff(Hlen,i-oi-1,_cPsi,_cptr);
		  	}
		   	if (i % samplen) { 
				if ((k > 1) && (k < HUFF)) _cptr = encodeHuff (Hacc,k,_cPsi,_cptr);
		        else {
					_cptr = encodeHuff (Hacc,0,_cPsi,_cptr);
			       	bitwrite (_cPsi,_cptr,pslen,Psi[i]);
				 	_cptr += pslen;
			  	}
			}
		  	else { 
				bitwrite (_bposS,(i/samplen)*_bplen,_bplen,_cptr);
			    bitwrite (_cPsi,_cptr,pslen,Psi[i]);
			    _cptr += pslen;
			}
		}
	   	ok = (i % samplen) ? k : 0;
		k = Psi[i+1]-Psi[i];
	}
		
	if (ok == 1) { 
		_cptr = encodeHuff (Hacc,1,_cPsi,_cptr);
		_cptr = encodeHuff(Hlen,i-oi-1,_cPsi,_cptr);
	}
	
	// Calculamos o espacio total
	totalSize = (((1+links/samplen)*_bplen+W-1)/W)*sizeof(uint) +
		((Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen+W-1)/W)*sizeof(uint) +
		5*sizeof(int) + sizeHuff(Hacc) + sizeHuff(Hlen);
	printf("\n\tCompressed Psi size = %d bytes\n", totalSize);
	
	// Necesario antes de decodificar
	prepareToDecode(&Hacc);
	prepareToDecode(&Hlen);
	
	// Asignamos os valores e devolvemos psi comprimido
	compressedPsi.links = psiSize;
	compressedPsi.totexc = totexc;
	compressedPsi.cPsi = _cPsi;
	compressedPsi.samplen = samplen;
	compressedPsi.bposS = _bposS;
	compressedPsi.bplen = _bplen;
	compressedPsi.pslen = pslen;
	compressedPsi.Hacc = Hacc;
	compressedPsi.Hlen = Hlen;
	compressedPsi.totalMem = totalSize;
	
	free(acc); 
	free(lacc);
	
	return compressedPsi;	
}
Beispiel #3
0
 uint64_t Huffman::encode(uint symb, uint * stream, uint64_t pos){
     return encodeHuff(huff_table, symb, stream, pos);
 }
Beispiel #4
0
HuffmanCompressedPsi huffmanCompressPsi(unsigned int *Psi, size_t psiSize, unsigned int T, unsigned int nS) {
	
	HuffmanCompressedPsi cPsi;
	
	uint absolute_value;
	register size_t index;
	register size_t ptr, samplesPtr, samplePointersPtr;
	unsigned int runLenght, binaryLenght;
	
	ssize_t *diffs;	
	unsigned int *huffmanDst;
	
	// Estructuras da funcion comprimida (para logo asignar)
	// Tam�n se podian almacenar directamente
	THuff diffsHT;
	size_t numberOfSamples;
	unsigned int *samples;

	unsigned int sampleSize;
	size_t *samplePointers;
	
	unsigned int pointerSize;
	unsigned int *stream;
	size_t streamSize;
	
	// Variables que marcan os intervalos dentro do vector de frecuencias
	unsigned int runLenghtStart = nS - 64 - T; 	// Inicio das Runs
	unsigned int negStart = nS - 64;			// Inicio dos Negativos
	unsigned int bigStart = nS - 32;			// Inicio dos Grandes (>runLenghtStart)
	
	// Para estadistica
	size_t totalSize;
	
	// Reservamos espacio para a distribuci�n de valores de Psi
	huffmanDst = (unsigned int *)malloc(sizeof(int)*nS);
	for(index=0;index<nS;index++) huffmanDst[index]=0;

	
	// Inicializamos diferencias	
	diffs = (ssize_t *)malloc(sizeof(ssize_t)*psiSize);	

	
	diffs[0] = 0;
	for(index=1; index<psiSize; index++) 
		diffs[index] = ((ssize_t)Psi[index]) - ((ssize_t)Psi[index-1]);
	
	// Calculamos a distribucion de frecuencias
	runLenght = 0;
	for(index=0; index<psiSize; index++) {

		if(index%T) {
			
			if(diffs[index]== ((ssize_t) 1) ) {
				runLenght++;
			} else {	// Non estamos nun run
				if(runLenght) {
					huffmanDst[runLenght+runLenghtStart]++;
					runLenght = 0;
				}
				if(diffs[index]>((ssize_t)1) && diffs[index]<runLenghtStart) 
					huffmanDst[diffs[index]]++;
				else
					if(diffs[index]< ((ssize_t) 0) ) {	// Valor negativo
						absolute_value = (uint) (-diffs[index]);
						binaryLenght = bits(absolute_value);
						huffmanDst[binaryLenght+negStart-1]++;
					} else {				// Valor grande >= 128
						absolute_value = (uint)(diffs[index]);
						binaryLenght = bits(absolute_value);
						huffmanDst[binaryLenght+bigStart-1]++;
					}
			}
			
		} else { // Rompemos o run porque atopamos unha mostra
			if(runLenght) {
				huffmanDst[runLenght+runLenghtStart]++;
				runLenght = 0;
			}
		}
		
	}
		
	if(runLenght) huffmanDst[runLenght+runLenghtStart]++;
	
	// Creamos o arbol de Huffman
	diffsHT = createHuff(huffmanDst,nS-1,UNSORTED);
	
	// Calculamos o espacio total ocupado pola secuencia Huffman + RLE
	streamSize = diffsHT.total;
	for(index=negStart;index<bigStart;index++) 
		streamSize += ((size_t)huffmanDst[index])*(index-negStart+1);	// Negativos
	for(index=bigStart;index<nS;index++) 
		streamSize += ((size_t)huffmanDst[index])*(index-bigStart+1);		// Grandes	
	
	// Calculamos o numero de mostras e o espacio ocupado por cada mostra e por cada punteiro
	numberOfSamples = (psiSize+T-1)/T;	
	sampleSize = bits(psiSize);
	pointerSize = bits(streamSize);	


	// Reservamos espacio para a secuencia e para as mostras e punteiros
	samples = (unsigned int *)malloc(sizeof(uint)*((numberOfSamples*sampleSize+W-1)/W));	
		samples[((numberOfSamples*sampleSize+W-1)/W)-1] =0000; //initialized only to avoid valgrind warnings
		
		
	samplePointers = (size_t *)malloc(sizeof(size_t)* (ulong_len(pointerSize,numberOfSamples)) );
		samplePointers[ (ulong_len(pointerSize,numberOfSamples)) -1] = 00000000;  //initialized only to avoid valgrind warnings
		
	stream = (unsigned int *)malloc(sizeof(int)*((streamSize+W-1)/W));
		stream[((streamSize+W-1)/W)-1]=0000;//initialized only to avoid valgrind warnings
	
	// Comprimimos secuencialmente (haber� que levar un punteiro desde o inicio)
	ptr = 0;
	samplesPtr = 0;
	samplePointersPtr = 0;
	runLenght = 0;
	for(index=0; index<psiSize; index++) {
		
		if(index%T) {
			
			if(diffs[index]==((ssize_t)1)) {
				runLenght++;
			} else {	// Non estamos nun run
				if(runLenght) {
					ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
					runLenght = 0;
				}
				if(diffs[index]>((ssize_t)1) && diffs[index]<runLenghtStart) {				
					ptr = encodeHuff(diffsHT,(uint)diffs[index],stream,ptr);	
				}	
				else
					if(diffs[index]< ((ssize_t)0) ) {	// Valor negativo
						absolute_value = (uint) (-diffs[index]);
						binaryLenght = bits(absolute_value);
						ptr = encodeHuff(diffsHT,binaryLenght+negStart-1,stream,ptr);
						bitwrite(stream,ptr,binaryLenght,absolute_value);
						ptr += binaryLenght;						
					} else {				// Valor grande >= 128
						absolute_value = (uint) diffs[index];
						binaryLenght = bits(absolute_value);					
						ptr = encodeHuff(diffsHT,binaryLenght+bigStart-1,stream,ptr);
						bitwrite(stream,ptr,binaryLenght,absolute_value);						
						ptr += binaryLenght;
					}
			}
			
		} else { // Rompemos o run porque atopamos unha mostra
			if(runLenght) {				
				ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
				runLenght = 0;
			}
			bitwrite(samples,samplesPtr,sampleSize, Psi[index]);
			samplesPtr += sampleSize;
			bitwrite64(samplePointers,samplePointersPtr,pointerSize,ptr);
			samplePointersPtr += pointerSize;
		}
		
	}
	
	if(runLenght) {	
		ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
	}
	
	// Amosamos o espacio ocupado
	totalSize = sizeof(HuffmanCompressedPsi) + 
		sizeof(int)*((numberOfSamples*sampleSize+W-1)/W) + 
		sizeof(size_t)*((numberOfSamples*pointerSize+WW-1)/WW) +
		sizeof(int)*((streamSize+W-1)/W) + sizeHuff(diffsHT);

	printf("\n\t Compressed Psi size = %zu bytes, with %d different symbols.", totalSize, nS);
	
	// Necesario antes de decodificar
	prepareToDecode(&diffsHT);
	
	// Asignamos os valores a cPsi e devolvemolo
	cPsi.T = T;
	cPsi.diffsHT = diffsHT;
	cPsi.nS = nS;
	cPsi.numberOfSamples = numberOfSamples;
	cPsi.samples = samples;
	cPsi.sampleSize = sampleSize;
	cPsi.samplePointers = samplePointers;
	cPsi.pointerSize = pointerSize;
	cPsi.stream = stream;
	cPsi.streamSize = streamSize;
	cPsi.totalMem = totalSize;
	
	//frees resources not needed in advance
	free(diffs);
	free(huffmanDst);
	
	//returns the data structure that holds the compressed psi.
	return cPsi;	
}