Ejemplo n.º 1
0
GonzaloCompressedPsi gonzaloCompressPsi(uint *Psi, uint psiSize, uint T, uint HUFF) {
	
	GonzaloCompressedPsi compressedPsi;
	
	register uint i;
	uint oi,j;
	int ok,k;
	register uint _cptr;

	uint *_cPsi;
	uint *_bposS;
		
	uint links = psiSize;
	uint samplen = T;		
	uint _bplen;
	uint pslen;	
	uint totexc;
		
	uint *acc,*lacc;
	THuff Hacc, Hlen;
	
	uint totalSize;
	
	// Construe os arboles de huffman, o dos valores directos
	// e o das lonxitudes dos runs. Usa como vectores auxiliares de frecuencias
	// a acc e lacc, que finalmente libera.
	acc = (uint *)malloc (HUFF*sizeof(uint));
	lacc = (uint *)malloc ((samplen-1)*sizeof(uint));
	for (k=0;k<HUFF;k++) acc[k]=0;
	for (k=0;k<samplen-1;k++) lacc[k]=0;
	
	ok = 0; 
	k = Psi[0];
	for (i=0;i<=links;i++) { 
		if ((k == 1) && (i % samplen)) { if (ok != 1) oi = i; }
		else { 
			if (ok == 1) { 
				acc[1]++;
			    lacc[i-oi-1]++;
		  	}
		    if (i % samplen) 
				if ((k < 1) || (k >= HUFF)) acc[0]++;
		      	else acc[k]++;
		}
	    ok = (i % samplen) ? k : 0;
		k = Psi[i+1]-Psi[i];
	}
	    
	if (ok == 1) { 
		acc[1]++; 
		lacc[i-oi-1]++;
	}
	
	Hacc = createHuff (acc,HUFF-1, UNSORTED);
	Hlen = createHuff (lacc,samplen-2, UNSORTED);
	totexc = acc[0];
	pslen = bits(psiSize+1);
	_bplen = bits(Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen);
	_bposS = (uint *)malloc ((((1+links/samplen)*_bplen+W-1)/W)*sizeof(uint));
	_cPsi  = (uint *)malloc (((Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen+W-1)/W)*sizeof(uint));	
	
	_cptr = 0; 
	ok = 0; 
	k = Psi[0];
	
	for (i=0;i<=links;i++) { 
		
		if ((k == 1) && (i % samplen)) { if (ok != 1) oi = i; }
		else { 
			if (ok == 1) { 
				_cptr = encodeHuff (Hacc,1,_cPsi,_cptr);
			    _cptr = encodeHuff(Hlen,i-oi-1,_cPsi,_cptr);
		  	}
		   	if (i % samplen) { 
				if ((k > 1) && (k < HUFF)) _cptr = encodeHuff (Hacc,k,_cPsi,_cptr);
		        else {
					_cptr = encodeHuff (Hacc,0,_cPsi,_cptr);
			       	bitwrite (_cPsi,_cptr,pslen,Psi[i]);
				 	_cptr += pslen;
			  	}
			}
		  	else { 
				bitwrite (_bposS,(i/samplen)*_bplen,_bplen,_cptr);
			    bitwrite (_cPsi,_cptr,pslen,Psi[i]);
			    _cptr += pslen;
			}
		}
	   	ok = (i % samplen) ? k : 0;
		k = Psi[i+1]-Psi[i];
	}
		
	if (ok == 1) { 
		_cptr = encodeHuff (Hacc,1,_cPsi,_cptr);
		_cptr = encodeHuff(Hlen,i-oi-1,_cPsi,_cptr);
	}
	
	// Calculamos o espacio total
	totalSize = (((1+links/samplen)*_bplen+W-1)/W)*sizeof(uint) +
		((Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen+W-1)/W)*sizeof(uint) +
		5*sizeof(int) + sizeHuff(Hacc) + sizeHuff(Hlen);
	printf("\n\tCompressed Psi size = %d bytes\n", totalSize);
	
	// Necesario antes de decodificar
	prepareToDecode(&Hacc);
	prepareToDecode(&Hlen);
	
	// Asignamos os valores e devolvemos psi comprimido
	compressedPsi.links = psiSize;
	compressedPsi.totexc = totexc;
	compressedPsi.cPsi = _cPsi;
	compressedPsi.samplen = samplen;
	compressedPsi.bposS = _bposS;
	compressedPsi.bplen = _bplen;
	compressedPsi.pslen = pslen;
	compressedPsi.Hacc = Hacc;
	compressedPsi.Hlen = Hlen;
	compressedPsi.totalMem = totalSize;
	
	free(acc); 
	free(lacc);
	
	return compressedPsi;	
}
FTRep* createFT(uint *list,uint listLength){
	FTRep * rep = (FTRep *) malloc(sizeof(struct sFTRep));
	uint *levelSizeAux;
	uint *cont;	
	uint *contB;
	
	ushort* kvalues;
	uint nkvalues;
	
	
	
	rep->listLength = listLength;
	register uint i;
	int j, k;
	uint value, newvalue;
	uint bits_BS_len = 0;
		
	kvalues = optimizationk(list,listLength,&nkvalues);
	
	
	uint kval;
	uint oldval =0;
	uint newval =0;
	
	i=0;
	uint multval=1;
	do{
		oldval=newval;
		if(i>=nkvalues){
			kval = 1<<(kvalues[nkvalues-1]);
		}
		else
			kval=1<<(kvalues[i]);	
		multval*=kval;
		newval = oldval+multval;

		i++;
	}
	while(oldval<newval);
	
	rep->tamtablebase = i;
	rep->tablebase = (uint *) malloc(sizeof(uint)*rep->tamtablebase);
	levelSizeAux = (uint *) malloc(sizeof(uint)*rep->tamtablebase);
	cont = (uint *) malloc(sizeof(uint)*rep->tamtablebase);
	contB = (uint *) malloc(sizeof(uint)*rep->tamtablebase);

	oldval =0;
	newval =0;
	multval=1;	
	for(i=0;i<rep->tamtablebase;i++){
		oldval=newval;
		if(i>=nkvalues){
			kval = 1<<(kvalues[nkvalues-1]);
		}
		else
			kval=1<<(kvalues[i]);	
		multval*=kval;
		newval = oldval+multval;
		rep->tablebase[i]=oldval;
	}	
	
	
	for(i=0;i<rep->tamtablebase;i++){
		levelSizeAux[i]=0;

	}


	for (i=0;i<listLength;i++){
		value = list[i];
		for(j=0;j<rep->tamtablebase;j++)
			if(value>=rep->tablebase[j])
				levelSizeAux[j]++;
	}

	j=0;

	while((j<rep->tamtablebase)&&(levelSizeAux[j]!=0)){
		 j++;
		}
	rep->nLevels = j;

	rep->levelsIndex = (uint *) malloc(sizeof(uint)*(rep->nLevels+1));
	bits_BS_len =0;
	
	rep->base = (uint *)malloc(sizeof(uint)*rep->nLevels);
	rep->base_bits = (ushort *)malloc(sizeof(ushort)*rep->nLevels);
	
	for(i=0;i<rep->nLevels;i++){
			if(i>=nkvalues){
				rep->base[i]=1<<(kvalues[nkvalues-1]);
				rep->base_bits[i]=kvalues[nkvalues-1];
			}
		else{
			rep->base[i]=1<<(kvalues[i]);
			rep->base_bits[i]=kvalues[i];
		}
	}

	uint tamLevels =0;
		

	tamLevels=0;
	for(i=0;i<rep->nLevels;i++)
		tamLevels+=rep->base_bits[i]*levelSizeAux[i];

	rep->iniLevel = (uint *)malloc(sizeof(uint)*rep->nLevels);		
	rep->tamCode=tamLevels;
	
	uint indexLevel=0;
	rep->levelsIndex[0]=0;
	for(j=0;j<rep->nLevels;j++){
		rep->levelsIndex[j+1]=rep->levelsIndex[j] + levelSizeAux[j];
			rep->iniLevel[j] = indexLevel;
			cont[j]=rep->iniLevel[j];
			indexLevel+=levelSizeAux[j]*rep->base_bits[j];
		contB[j]=rep->levelsIndex[j];

	}


	rep->levels = (uint *) malloc(sizeof(uint)*(tamLevels/W+1));

	bits_BS_len = rep->levelsIndex[rep->nLevels-1]+1; 

	uint * bits_BS = (uint *) malloc(sizeof(uint)*(bits_BS_len/W+1));
	for(i=0; i<((bits_BS_len)/W+1);i++)
		bits_BS[i]=0;
	for(i=0;i<listLength;i++){
		value = list[i];
		j=rep->nLevels-1;

		while(j>=0){
			if(value >= rep->tablebase[j]){

				newvalue = value- rep->tablebase[j];

				for(k=0;k<j;k++){
					

					bitwrite(rep->levels,cont[k],rep->base_bits[k],newvalue%rep->base[k]);
					cont[k]+=rep->base_bits[k];
					contB[k]++;

					newvalue = newvalue/rep->base[k];
				}
				k=j;

					bitwrite(rep->levels,cont[j],rep->base_bits[j],newvalue%rep->base[j]);
					cont[j]+=rep->base_bits[j];
					contB[j]++;
				if(j<rep->nLevels-1){
					bitset(bits_BS,contB[j]-1);

				}
									
				break;
							}
			j--;
		}

		
	}


	bitset(bits_BS,bits_BS_len-1);

	rep->bS = createBitRankW32Int(bits_BS, bits_BS_len , 1, 20); 	




	
	rep->rankLevels = (uint *) malloc(sizeof(uint)*rep->nLevels);
	for(j=0;j<rep->nLevels;j++)
 			rep->rankLevels[j]= rank(rep->bS, rep->levelsIndex[j]-1);

		
	free(cont);
	free(contB);
	free(levelSizeAux);
	free(kvalues);
	return rep;
}
Ejemplo n.º 3
0
HuffmanCompressedPsi huffmanCompressPsi(unsigned int *Psi, size_t psiSize, unsigned int T, unsigned int nS) {
	
	HuffmanCompressedPsi cPsi;
	
	uint absolute_value;
	register size_t index;
	register size_t ptr, samplesPtr, samplePointersPtr;
	unsigned int runLenght, binaryLenght;
	
	ssize_t *diffs;	
	unsigned int *huffmanDst;
	
	// Estructuras da funcion comprimida (para logo asignar)
	// Tam�n se podian almacenar directamente
	THuff diffsHT;
	size_t numberOfSamples;
	unsigned int *samples;

	unsigned int sampleSize;
	size_t *samplePointers;
	
	unsigned int pointerSize;
	unsigned int *stream;
	size_t streamSize;
	
	// Variables que marcan os intervalos dentro do vector de frecuencias
	unsigned int runLenghtStart = nS - 64 - T; 	// Inicio das Runs
	unsigned int negStart = nS - 64;			// Inicio dos Negativos
	unsigned int bigStart = nS - 32;			// Inicio dos Grandes (>runLenghtStart)
	
	// Para estadistica
	size_t totalSize;
	
	// Reservamos espacio para a distribuci�n de valores de Psi
	huffmanDst = (unsigned int *)malloc(sizeof(int)*nS);
	for(index=0;index<nS;index++) huffmanDst[index]=0;

	
	// Inicializamos diferencias	
	diffs = (ssize_t *)malloc(sizeof(ssize_t)*psiSize);	

	
	diffs[0] = 0;
	for(index=1; index<psiSize; index++) 
		diffs[index] = ((ssize_t)Psi[index]) - ((ssize_t)Psi[index-1]);
	
	// Calculamos a distribucion de frecuencias
	runLenght = 0;
	for(index=0; index<psiSize; index++) {

		if(index%T) {
			
			if(diffs[index]== ((ssize_t) 1) ) {
				runLenght++;
			} else {	// Non estamos nun run
				if(runLenght) {
					huffmanDst[runLenght+runLenghtStart]++;
					runLenght = 0;
				}
				if(diffs[index]>((ssize_t)1) && diffs[index]<runLenghtStart) 
					huffmanDst[diffs[index]]++;
				else
					if(diffs[index]< ((ssize_t) 0) ) {	// Valor negativo
						absolute_value = (uint) (-diffs[index]);
						binaryLenght = bits(absolute_value);
						huffmanDst[binaryLenght+negStart-1]++;
					} else {				// Valor grande >= 128
						absolute_value = (uint)(diffs[index]);
						binaryLenght = bits(absolute_value);
						huffmanDst[binaryLenght+bigStart-1]++;
					}
			}
			
		} else { // Rompemos o run porque atopamos unha mostra
			if(runLenght) {
				huffmanDst[runLenght+runLenghtStart]++;
				runLenght = 0;
			}
		}
		
	}
		
	if(runLenght) huffmanDst[runLenght+runLenghtStart]++;
	
	// Creamos o arbol de Huffman
	diffsHT = createHuff(huffmanDst,nS-1,UNSORTED);
	
	// Calculamos o espacio total ocupado pola secuencia Huffman + RLE
	streamSize = diffsHT.total;
	for(index=negStart;index<bigStart;index++) 
		streamSize += ((size_t)huffmanDst[index])*(index-negStart+1);	// Negativos
	for(index=bigStart;index<nS;index++) 
		streamSize += ((size_t)huffmanDst[index])*(index-bigStart+1);		// Grandes	
	
	// Calculamos o numero de mostras e o espacio ocupado por cada mostra e por cada punteiro
	numberOfSamples = (psiSize+T-1)/T;	
	sampleSize = bits(psiSize);
	pointerSize = bits(streamSize);	


	// Reservamos espacio para a secuencia e para as mostras e punteiros
	samples = (unsigned int *)malloc(sizeof(uint)*((numberOfSamples*sampleSize+W-1)/W));	
		samples[((numberOfSamples*sampleSize+W-1)/W)-1] =0000; //initialized only to avoid valgrind warnings
		
		
	samplePointers = (size_t *)malloc(sizeof(size_t)* (ulong_len(pointerSize,numberOfSamples)) );
		samplePointers[ (ulong_len(pointerSize,numberOfSamples)) -1] = 00000000;  //initialized only to avoid valgrind warnings
		
	stream = (unsigned int *)malloc(sizeof(int)*((streamSize+W-1)/W));
		stream[((streamSize+W-1)/W)-1]=0000;//initialized only to avoid valgrind warnings
	
	// Comprimimos secuencialmente (haber� que levar un punteiro desde o inicio)
	ptr = 0;
	samplesPtr = 0;
	samplePointersPtr = 0;
	runLenght = 0;
	for(index=0; index<psiSize; index++) {
		
		if(index%T) {
			
			if(diffs[index]==((ssize_t)1)) {
				runLenght++;
			} else {	// Non estamos nun run
				if(runLenght) {
					ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
					runLenght = 0;
				}
				if(diffs[index]>((ssize_t)1) && diffs[index]<runLenghtStart) {				
					ptr = encodeHuff(diffsHT,(uint)diffs[index],stream,ptr);	
				}	
				else
					if(diffs[index]< ((ssize_t)0) ) {	// Valor negativo
						absolute_value = (uint) (-diffs[index]);
						binaryLenght = bits(absolute_value);
						ptr = encodeHuff(diffsHT,binaryLenght+negStart-1,stream,ptr);
						bitwrite(stream,ptr,binaryLenght,absolute_value);
						ptr += binaryLenght;						
					} else {				// Valor grande >= 128
						absolute_value = (uint) diffs[index];
						binaryLenght = bits(absolute_value);					
						ptr = encodeHuff(diffsHT,binaryLenght+bigStart-1,stream,ptr);
						bitwrite(stream,ptr,binaryLenght,absolute_value);						
						ptr += binaryLenght;
					}
			}
			
		} else { // Rompemos o run porque atopamos unha mostra
			if(runLenght) {				
				ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
				runLenght = 0;
			}
			bitwrite(samples,samplesPtr,sampleSize, Psi[index]);
			samplesPtr += sampleSize;
			bitwrite64(samplePointers,samplePointersPtr,pointerSize,ptr);
			samplePointersPtr += pointerSize;
		}
		
	}
	
	if(runLenght) {	
		ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
	}
	
	// Amosamos o espacio ocupado
	totalSize = sizeof(HuffmanCompressedPsi) + 
		sizeof(int)*((numberOfSamples*sampleSize+W-1)/W) + 
		sizeof(size_t)*((numberOfSamples*pointerSize+WW-1)/WW) +
		sizeof(int)*((streamSize+W-1)/W) + sizeHuff(diffsHT);

	printf("\n\t Compressed Psi size = %zu bytes, with %d different symbols.", totalSize, nS);
	
	// Necesario antes de decodificar
	prepareToDecode(&diffsHT);
	
	// Asignamos os valores a cPsi e devolvemolo
	cPsi.T = T;
	cPsi.diffsHT = diffsHT;
	cPsi.nS = nS;
	cPsi.numberOfSamples = numberOfSamples;
	cPsi.samples = samples;
	cPsi.sampleSize = sampleSize;
	cPsi.samplePointers = samplePointers;
	cPsi.pointerSize = pointerSize;
	cPsi.stream = stream;
	cPsi.streamSize = streamSize;
	cPsi.totalMem = totalSize;
	
	//frees resources not needed in advance
	free(diffs);
	free(huffmanDst);
	
	//returns the data structure that holds the compressed psi.
	return cPsi;	
}