Codeword* Huffman::obtainCodewords() { // ************************************* // This implementation is optimized for managing codewords up to 32 bits. // ************************************* // Codewords are right-aligned to be compliant with the decoding table Codeword* codewords = new Codeword[huff_table.max+1]; // The Huffman tree is rebuilt for the new code assignment tree = new BinaryNode(); uint *stream = new uint[256]; for (uint i=0; i<256; i++) stream[i] = 0; //for (uint i=0; i<=huff_table.max; i++) for (uint i=0; i<=255; i++) { BinaryNode *current = tree; codewords[i].bits = encodeHuff(huff_table, i, stream, (size_t)0); codewords[i].codeword = 0; for (uint j=0; j<codewords[i].bits; j++) { bool bit = (stream[0] >> j) & 1; codewords[i].codeword = codewords[i].codeword << 1; codewords[i].codeword = (codewords[i].codeword | bit); if (bit == 0) { if (current->leftChild == NULL) current->leftChild = new BinaryNode(); current = current->leftChild; } else { if (current->rightChild == NULL) current->rightChild = new BinaryNode(); current = current->rightChild; } } current->position = i; stream[0] = 0; } delete [] stream; return codewords; }
GonzaloCompressedPsi gonzaloCompressPsi(uint *Psi, uint psiSize, uint T, uint HUFF) { GonzaloCompressedPsi compressedPsi; register uint i; uint oi,j; int ok,k; register uint _cptr; uint *_cPsi; uint *_bposS; uint links = psiSize; uint samplen = T; uint _bplen; uint pslen; uint totexc; uint *acc,*lacc; THuff Hacc, Hlen; uint totalSize; // Construe os arboles de huffman, o dos valores directos // e o das lonxitudes dos runs. Usa como vectores auxiliares de frecuencias // a acc e lacc, que finalmente libera. acc = (uint *)malloc (HUFF*sizeof(uint)); lacc = (uint *)malloc ((samplen-1)*sizeof(uint)); for (k=0;k<HUFF;k++) acc[k]=0; for (k=0;k<samplen-1;k++) lacc[k]=0; ok = 0; k = Psi[0]; for (i=0;i<=links;i++) { if ((k == 1) && (i % samplen)) { if (ok != 1) oi = i; } else { if (ok == 1) { acc[1]++; lacc[i-oi-1]++; } if (i % samplen) if ((k < 1) || (k >= HUFF)) acc[0]++; else acc[k]++; } ok = (i % samplen) ? k : 0; k = Psi[i+1]-Psi[i]; } if (ok == 1) { acc[1]++; lacc[i-oi-1]++; } Hacc = createHuff (acc,HUFF-1, UNSORTED); Hlen = createHuff (lacc,samplen-2, UNSORTED); totexc = acc[0]; pslen = bits(psiSize+1); _bplen = bits(Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen); _bposS = (uint *)malloc ((((1+links/samplen)*_bplen+W-1)/W)*sizeof(uint)); _cPsi = (uint *)malloc (((Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen+W-1)/W)*sizeof(uint)); _cptr = 0; ok = 0; k = Psi[0]; for (i=0;i<=links;i++) { if ((k == 1) && (i % samplen)) { if (ok != 1) oi = i; } else { if (ok == 1) { _cptr = encodeHuff (Hacc,1,_cPsi,_cptr); _cptr = encodeHuff(Hlen,i-oi-1,_cPsi,_cptr); } if (i % samplen) { if ((k > 1) && (k < HUFF)) _cptr = encodeHuff (Hacc,k,_cPsi,_cptr); else { _cptr = encodeHuff (Hacc,0,_cPsi,_cptr); bitwrite (_cPsi,_cptr,pslen,Psi[i]); _cptr += pslen; } } else { bitwrite (_bposS,(i/samplen)*_bplen,_bplen,_cptr); bitwrite (_cPsi,_cptr,pslen,Psi[i]); _cptr += pslen; } } ok = (i % samplen) ? k : 0; k = Psi[i+1]-Psi[i]; } if (ok == 1) { _cptr = encodeHuff (Hacc,1,_cPsi,_cptr); _cptr = encodeHuff(Hlen,i-oi-1,_cPsi,_cptr); } // Calculamos o espacio total totalSize = (((1+links/samplen)*_bplen+W-1)/W)*sizeof(uint) + ((Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen+W-1)/W)*sizeof(uint) + 5*sizeof(int) + sizeHuff(Hacc) + sizeHuff(Hlen); printf("\n\tCompressed Psi size = %d bytes\n", totalSize); // Necesario antes de decodificar prepareToDecode(&Hacc); prepareToDecode(&Hlen); // Asignamos os valores e devolvemos psi comprimido compressedPsi.links = psiSize; compressedPsi.totexc = totexc; compressedPsi.cPsi = _cPsi; compressedPsi.samplen = samplen; compressedPsi.bposS = _bposS; compressedPsi.bplen = _bplen; compressedPsi.pslen = pslen; compressedPsi.Hacc = Hacc; compressedPsi.Hlen = Hlen; compressedPsi.totalMem = totalSize; free(acc); free(lacc); return compressedPsi; }
uint64_t Huffman::encode(uint symb, uint * stream, uint64_t pos){ return encodeHuff(huff_table, symb, stream, pos); }
HuffmanCompressedPsi huffmanCompressPsi(unsigned int *Psi, size_t psiSize, unsigned int T, unsigned int nS) { HuffmanCompressedPsi cPsi; uint absolute_value; register size_t index; register size_t ptr, samplesPtr, samplePointersPtr; unsigned int runLenght, binaryLenght; ssize_t *diffs; unsigned int *huffmanDst; // Estructuras da funcion comprimida (para logo asignar) // Tam�n se podian almacenar directamente THuff diffsHT; size_t numberOfSamples; unsigned int *samples; unsigned int sampleSize; size_t *samplePointers; unsigned int pointerSize; unsigned int *stream; size_t streamSize; // Variables que marcan os intervalos dentro do vector de frecuencias unsigned int runLenghtStart = nS - 64 - T; // Inicio das Runs unsigned int negStart = nS - 64; // Inicio dos Negativos unsigned int bigStart = nS - 32; // Inicio dos Grandes (>runLenghtStart) // Para estadistica size_t totalSize; // Reservamos espacio para a distribuci�n de valores de Psi huffmanDst = (unsigned int *)malloc(sizeof(int)*nS); for(index=0;index<nS;index++) huffmanDst[index]=0; // Inicializamos diferencias diffs = (ssize_t *)malloc(sizeof(ssize_t)*psiSize); diffs[0] = 0; for(index=1; index<psiSize; index++) diffs[index] = ((ssize_t)Psi[index]) - ((ssize_t)Psi[index-1]); // Calculamos a distribucion de frecuencias runLenght = 0; for(index=0; index<psiSize; index++) { if(index%T) { if(diffs[index]== ((ssize_t) 1) ) { runLenght++; } else { // Non estamos nun run if(runLenght) { huffmanDst[runLenght+runLenghtStart]++; runLenght = 0; } if(diffs[index]>((ssize_t)1) && diffs[index]<runLenghtStart) huffmanDst[diffs[index]]++; else if(diffs[index]< ((ssize_t) 0) ) { // Valor negativo absolute_value = (uint) (-diffs[index]); binaryLenght = bits(absolute_value); huffmanDst[binaryLenght+negStart-1]++; } else { // Valor grande >= 128 absolute_value = (uint)(diffs[index]); binaryLenght = bits(absolute_value); huffmanDst[binaryLenght+bigStart-1]++; } } } else { // Rompemos o run porque atopamos unha mostra if(runLenght) { huffmanDst[runLenght+runLenghtStart]++; runLenght = 0; } } } if(runLenght) huffmanDst[runLenght+runLenghtStart]++; // Creamos o arbol de Huffman diffsHT = createHuff(huffmanDst,nS-1,UNSORTED); // Calculamos o espacio total ocupado pola secuencia Huffman + RLE streamSize = diffsHT.total; for(index=negStart;index<bigStart;index++) streamSize += ((size_t)huffmanDst[index])*(index-negStart+1); // Negativos for(index=bigStart;index<nS;index++) streamSize += ((size_t)huffmanDst[index])*(index-bigStart+1); // Grandes // Calculamos o numero de mostras e o espacio ocupado por cada mostra e por cada punteiro numberOfSamples = (psiSize+T-1)/T; sampleSize = bits(psiSize); pointerSize = bits(streamSize); // Reservamos espacio para a secuencia e para as mostras e punteiros samples = (unsigned int *)malloc(sizeof(uint)*((numberOfSamples*sampleSize+W-1)/W)); samples[((numberOfSamples*sampleSize+W-1)/W)-1] =0000; //initialized only to avoid valgrind warnings samplePointers = (size_t *)malloc(sizeof(size_t)* (ulong_len(pointerSize,numberOfSamples)) ); samplePointers[ (ulong_len(pointerSize,numberOfSamples)) -1] = 00000000; //initialized only to avoid valgrind warnings stream = (unsigned int *)malloc(sizeof(int)*((streamSize+W-1)/W)); stream[((streamSize+W-1)/W)-1]=0000;//initialized only to avoid valgrind warnings // Comprimimos secuencialmente (haber� que levar un punteiro desde o inicio) ptr = 0; samplesPtr = 0; samplePointersPtr = 0; runLenght = 0; for(index=0; index<psiSize; index++) { if(index%T) { if(diffs[index]==((ssize_t)1)) { runLenght++; } else { // Non estamos nun run if(runLenght) { ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr); runLenght = 0; } if(diffs[index]>((ssize_t)1) && diffs[index]<runLenghtStart) { ptr = encodeHuff(diffsHT,(uint)diffs[index],stream,ptr); } else if(diffs[index]< ((ssize_t)0) ) { // Valor negativo absolute_value = (uint) (-diffs[index]); binaryLenght = bits(absolute_value); ptr = encodeHuff(diffsHT,binaryLenght+negStart-1,stream,ptr); bitwrite(stream,ptr,binaryLenght,absolute_value); ptr += binaryLenght; } else { // Valor grande >= 128 absolute_value = (uint) diffs[index]; binaryLenght = bits(absolute_value); ptr = encodeHuff(diffsHT,binaryLenght+bigStart-1,stream,ptr); bitwrite(stream,ptr,binaryLenght,absolute_value); ptr += binaryLenght; } } } else { // Rompemos o run porque atopamos unha mostra if(runLenght) { ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr); runLenght = 0; } bitwrite(samples,samplesPtr,sampleSize, Psi[index]); samplesPtr += sampleSize; bitwrite64(samplePointers,samplePointersPtr,pointerSize,ptr); samplePointersPtr += pointerSize; } } if(runLenght) { ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr); } // Amosamos o espacio ocupado totalSize = sizeof(HuffmanCompressedPsi) + sizeof(int)*((numberOfSamples*sampleSize+W-1)/W) + sizeof(size_t)*((numberOfSamples*pointerSize+WW-1)/WW) + sizeof(int)*((streamSize+W-1)/W) + sizeHuff(diffsHT); printf("\n\t Compressed Psi size = %zu bytes, with %d different symbols.", totalSize, nS); // Necesario antes de decodificar prepareToDecode(&diffsHT); // Asignamos os valores a cPsi e devolvemolo cPsi.T = T; cPsi.diffsHT = diffsHT; cPsi.nS = nS; cPsi.numberOfSamples = numberOfSamples; cPsi.samples = samples; cPsi.sampleSize = sampleSize; cPsi.samplePointers = samplePointers; cPsi.pointerSize = pointerSize; cPsi.stream = stream; cPsi.streamSize = streamSize; cPsi.totalMem = totalSize; //frees resources not needed in advance free(diffs); free(huffmanDst); //returns the data structure that holds the compressed psi. return cPsi; }