GonzaloCompressedPsi gonzaloCompressPsi(uint *Psi, uint psiSize, uint T, uint HUFF) { GonzaloCompressedPsi compressedPsi; register uint i; uint oi,j; int ok,k; register uint _cptr; uint *_cPsi; uint *_bposS; uint links = psiSize; uint samplen = T; uint _bplen; uint pslen; uint totexc; uint *acc,*lacc; THuff Hacc, Hlen; uint totalSize; // Construe os arboles de huffman, o dos valores directos // e o das lonxitudes dos runs. Usa como vectores auxiliares de frecuencias // a acc e lacc, que finalmente libera. acc = (uint *)malloc (HUFF*sizeof(uint)); lacc = (uint *)malloc ((samplen-1)*sizeof(uint)); for (k=0;k<HUFF;k++) acc[k]=0; for (k=0;k<samplen-1;k++) lacc[k]=0; ok = 0; k = Psi[0]; for (i=0;i<=links;i++) { if ((k == 1) && (i % samplen)) { if (ok != 1) oi = i; } else { if (ok == 1) { acc[1]++; lacc[i-oi-1]++; } if (i % samplen) if ((k < 1) || (k >= HUFF)) acc[0]++; else acc[k]++; } ok = (i % samplen) ? k : 0; k = Psi[i+1]-Psi[i]; } if (ok == 1) { acc[1]++; lacc[i-oi-1]++; } Hacc = createHuff (acc,HUFF-1, UNSORTED); Hlen = createHuff (lacc,samplen-2, UNSORTED); totexc = acc[0]; pslen = bits(psiSize+1); _bplen = bits(Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen); _bposS = (uint *)malloc ((((1+links/samplen)*_bplen+W-1)/W)*sizeof(uint)); _cPsi = (uint *)malloc (((Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen+W-1)/W)*sizeof(uint)); _cptr = 0; ok = 0; k = Psi[0]; for (i=0;i<=links;i++) { if ((k == 1) && (i % samplen)) { if (ok != 1) oi = i; } else { if (ok == 1) { _cptr = encodeHuff (Hacc,1,_cPsi,_cptr); _cptr = encodeHuff(Hlen,i-oi-1,_cPsi,_cptr); } if (i % samplen) { if ((k > 1) && (k < HUFF)) _cptr = encodeHuff (Hacc,k,_cPsi,_cptr); else { _cptr = encodeHuff (Hacc,0,_cPsi,_cptr); bitwrite (_cPsi,_cptr,pslen,Psi[i]); _cptr += pslen; } } else { bitwrite (_bposS,(i/samplen)*_bplen,_bplen,_cptr); bitwrite (_cPsi,_cptr,pslen,Psi[i]); _cptr += pslen; } } ok = (i % samplen) ? k : 0; k = Psi[i+1]-Psi[i]; } if (ok == 1) { _cptr = encodeHuff (Hacc,1,_cPsi,_cptr); _cptr = encodeHuff(Hlen,i-oi-1,_cPsi,_cptr); } // Calculamos o espacio total totalSize = (((1+links/samplen)*_bplen+W-1)/W)*sizeof(uint) + ((Hacc.total+Hlen.total+(1+links/samplen+totexc)*pslen+W-1)/W)*sizeof(uint) + 5*sizeof(int) + sizeHuff(Hacc) + sizeHuff(Hlen); printf("\n\tCompressed Psi size = %d bytes\n", totalSize); // Necesario antes de decodificar prepareToDecode(&Hacc); prepareToDecode(&Hlen); // Asignamos os valores e devolvemos psi comprimido compressedPsi.links = psiSize; compressedPsi.totexc = totexc; compressedPsi.cPsi = _cPsi; compressedPsi.samplen = samplen; compressedPsi.bposS = _bposS; compressedPsi.bplen = _bplen; compressedPsi.pslen = pslen; compressedPsi.Hacc = Hacc; compressedPsi.Hlen = Hlen; compressedPsi.totalMem = totalSize; free(acc); free(lacc); return compressedPsi; }
FTRep* createFT(uint *list,uint listLength){ FTRep * rep = (FTRep *) malloc(sizeof(struct sFTRep)); uint *levelSizeAux; uint *cont; uint *contB; ushort* kvalues; uint nkvalues; rep->listLength = listLength; register uint i; int j, k; uint value, newvalue; uint bits_BS_len = 0; kvalues = optimizationk(list,listLength,&nkvalues); uint kval; uint oldval =0; uint newval =0; i=0; uint multval=1; do{ oldval=newval; if(i>=nkvalues){ kval = 1<<(kvalues[nkvalues-1]); } else kval=1<<(kvalues[i]); multval*=kval; newval = oldval+multval; i++; } while(oldval<newval); rep->tamtablebase = i; rep->tablebase = (uint *) malloc(sizeof(uint)*rep->tamtablebase); levelSizeAux = (uint *) malloc(sizeof(uint)*rep->tamtablebase); cont = (uint *) malloc(sizeof(uint)*rep->tamtablebase); contB = (uint *) malloc(sizeof(uint)*rep->tamtablebase); oldval =0; newval =0; multval=1; for(i=0;i<rep->tamtablebase;i++){ oldval=newval; if(i>=nkvalues){ kval = 1<<(kvalues[nkvalues-1]); } else kval=1<<(kvalues[i]); multval*=kval; newval = oldval+multval; rep->tablebase[i]=oldval; } for(i=0;i<rep->tamtablebase;i++){ levelSizeAux[i]=0; } for (i=0;i<listLength;i++){ value = list[i]; for(j=0;j<rep->tamtablebase;j++) if(value>=rep->tablebase[j]) levelSizeAux[j]++; } j=0; while((j<rep->tamtablebase)&&(levelSizeAux[j]!=0)){ j++; } rep->nLevels = j; rep->levelsIndex = (uint *) malloc(sizeof(uint)*(rep->nLevels+1)); bits_BS_len =0; rep->base = (uint *)malloc(sizeof(uint)*rep->nLevels); rep->base_bits = (ushort *)malloc(sizeof(ushort)*rep->nLevels); for(i=0;i<rep->nLevels;i++){ if(i>=nkvalues){ rep->base[i]=1<<(kvalues[nkvalues-1]); rep->base_bits[i]=kvalues[nkvalues-1]; } else{ rep->base[i]=1<<(kvalues[i]); rep->base_bits[i]=kvalues[i]; } } uint tamLevels =0; tamLevels=0; for(i=0;i<rep->nLevels;i++) tamLevels+=rep->base_bits[i]*levelSizeAux[i]; rep->iniLevel = (uint *)malloc(sizeof(uint)*rep->nLevels); rep->tamCode=tamLevels; uint indexLevel=0; rep->levelsIndex[0]=0; for(j=0;j<rep->nLevels;j++){ rep->levelsIndex[j+1]=rep->levelsIndex[j] + levelSizeAux[j]; rep->iniLevel[j] = indexLevel; cont[j]=rep->iniLevel[j]; indexLevel+=levelSizeAux[j]*rep->base_bits[j]; contB[j]=rep->levelsIndex[j]; } rep->levels = (uint *) malloc(sizeof(uint)*(tamLevels/W+1)); bits_BS_len = rep->levelsIndex[rep->nLevels-1]+1; uint * bits_BS = (uint *) malloc(sizeof(uint)*(bits_BS_len/W+1)); for(i=0; i<((bits_BS_len)/W+1);i++) bits_BS[i]=0; for(i=0;i<listLength;i++){ value = list[i]; j=rep->nLevels-1; while(j>=0){ if(value >= rep->tablebase[j]){ newvalue = value- rep->tablebase[j]; for(k=0;k<j;k++){ bitwrite(rep->levels,cont[k],rep->base_bits[k],newvalue%rep->base[k]); cont[k]+=rep->base_bits[k]; contB[k]++; newvalue = newvalue/rep->base[k]; } k=j; bitwrite(rep->levels,cont[j],rep->base_bits[j],newvalue%rep->base[j]); cont[j]+=rep->base_bits[j]; contB[j]++; if(j<rep->nLevels-1){ bitset(bits_BS,contB[j]-1); } break; } j--; } } bitset(bits_BS,bits_BS_len-1); rep->bS = createBitRankW32Int(bits_BS, bits_BS_len , 1, 20); rep->rankLevels = (uint *) malloc(sizeof(uint)*rep->nLevels); for(j=0;j<rep->nLevels;j++) rep->rankLevels[j]= rank(rep->bS, rep->levelsIndex[j]-1); free(cont); free(contB); free(levelSizeAux); free(kvalues); return rep; }
HuffmanCompressedPsi huffmanCompressPsi(unsigned int *Psi, size_t psiSize, unsigned int T, unsigned int nS) { HuffmanCompressedPsi cPsi; uint absolute_value; register size_t index; register size_t ptr, samplesPtr, samplePointersPtr; unsigned int runLenght, binaryLenght; ssize_t *diffs; unsigned int *huffmanDst; // Estructuras da funcion comprimida (para logo asignar) // Tam�n se podian almacenar directamente THuff diffsHT; size_t numberOfSamples; unsigned int *samples; unsigned int sampleSize; size_t *samplePointers; unsigned int pointerSize; unsigned int *stream; size_t streamSize; // Variables que marcan os intervalos dentro do vector de frecuencias unsigned int runLenghtStart = nS - 64 - T; // Inicio das Runs unsigned int negStart = nS - 64; // Inicio dos Negativos unsigned int bigStart = nS - 32; // Inicio dos Grandes (>runLenghtStart) // Para estadistica size_t totalSize; // Reservamos espacio para a distribuci�n de valores de Psi huffmanDst = (unsigned int *)malloc(sizeof(int)*nS); for(index=0;index<nS;index++) huffmanDst[index]=0; // Inicializamos diferencias diffs = (ssize_t *)malloc(sizeof(ssize_t)*psiSize); diffs[0] = 0; for(index=1; index<psiSize; index++) diffs[index] = ((ssize_t)Psi[index]) - ((ssize_t)Psi[index-1]); // Calculamos a distribucion de frecuencias runLenght = 0; for(index=0; index<psiSize; index++) { if(index%T) { if(diffs[index]== ((ssize_t) 1) ) { runLenght++; } else { // Non estamos nun run if(runLenght) { huffmanDst[runLenght+runLenghtStart]++; runLenght = 0; } if(diffs[index]>((ssize_t)1) && diffs[index]<runLenghtStart) huffmanDst[diffs[index]]++; else if(diffs[index]< ((ssize_t) 0) ) { // Valor negativo absolute_value = (uint) (-diffs[index]); binaryLenght = bits(absolute_value); huffmanDst[binaryLenght+negStart-1]++; } else { // Valor grande >= 128 absolute_value = (uint)(diffs[index]); binaryLenght = bits(absolute_value); huffmanDst[binaryLenght+bigStart-1]++; } } } else { // Rompemos o run porque atopamos unha mostra if(runLenght) { huffmanDst[runLenght+runLenghtStart]++; runLenght = 0; } } } if(runLenght) huffmanDst[runLenght+runLenghtStart]++; // Creamos o arbol de Huffman diffsHT = createHuff(huffmanDst,nS-1,UNSORTED); // Calculamos o espacio total ocupado pola secuencia Huffman + RLE streamSize = diffsHT.total; for(index=negStart;index<bigStart;index++) streamSize += ((size_t)huffmanDst[index])*(index-negStart+1); // Negativos for(index=bigStart;index<nS;index++) streamSize += ((size_t)huffmanDst[index])*(index-bigStart+1); // Grandes // Calculamos o numero de mostras e o espacio ocupado por cada mostra e por cada punteiro numberOfSamples = (psiSize+T-1)/T; sampleSize = bits(psiSize); pointerSize = bits(streamSize); // Reservamos espacio para a secuencia e para as mostras e punteiros samples = (unsigned int *)malloc(sizeof(uint)*((numberOfSamples*sampleSize+W-1)/W)); samples[((numberOfSamples*sampleSize+W-1)/W)-1] =0000; //initialized only to avoid valgrind warnings samplePointers = (size_t *)malloc(sizeof(size_t)* (ulong_len(pointerSize,numberOfSamples)) ); samplePointers[ (ulong_len(pointerSize,numberOfSamples)) -1] = 00000000; //initialized only to avoid valgrind warnings stream = (unsigned int *)malloc(sizeof(int)*((streamSize+W-1)/W)); stream[((streamSize+W-1)/W)-1]=0000;//initialized only to avoid valgrind warnings // Comprimimos secuencialmente (haber� que levar un punteiro desde o inicio) ptr = 0; samplesPtr = 0; samplePointersPtr = 0; runLenght = 0; for(index=0; index<psiSize; index++) { if(index%T) { if(diffs[index]==((ssize_t)1)) { runLenght++; } else { // Non estamos nun run if(runLenght) { ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr); runLenght = 0; } if(diffs[index]>((ssize_t)1) && diffs[index]<runLenghtStart) { ptr = encodeHuff(diffsHT,(uint)diffs[index],stream,ptr); } else if(diffs[index]< ((ssize_t)0) ) { // Valor negativo absolute_value = (uint) (-diffs[index]); binaryLenght = bits(absolute_value); ptr = encodeHuff(diffsHT,binaryLenght+negStart-1,stream,ptr); bitwrite(stream,ptr,binaryLenght,absolute_value); ptr += binaryLenght; } else { // Valor grande >= 128 absolute_value = (uint) diffs[index]; binaryLenght = bits(absolute_value); ptr = encodeHuff(diffsHT,binaryLenght+bigStart-1,stream,ptr); bitwrite(stream,ptr,binaryLenght,absolute_value); ptr += binaryLenght; } } } else { // Rompemos o run porque atopamos unha mostra if(runLenght) { ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr); runLenght = 0; } bitwrite(samples,samplesPtr,sampleSize, Psi[index]); samplesPtr += sampleSize; bitwrite64(samplePointers,samplePointersPtr,pointerSize,ptr); samplePointersPtr += pointerSize; } } if(runLenght) { ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr); } // Amosamos o espacio ocupado totalSize = sizeof(HuffmanCompressedPsi) + sizeof(int)*((numberOfSamples*sampleSize+W-1)/W) + sizeof(size_t)*((numberOfSamples*pointerSize+WW-1)/WW) + sizeof(int)*((streamSize+W-1)/W) + sizeHuff(diffsHT); printf("\n\t Compressed Psi size = %zu bytes, with %d different symbols.", totalSize, nS); // Necesario antes de decodificar prepareToDecode(&diffsHT); // Asignamos os valores a cPsi e devolvemolo cPsi.T = T; cPsi.diffsHT = diffsHT; cPsi.nS = nS; cPsi.numberOfSamples = numberOfSamples; cPsi.samples = samples; cPsi.sampleSize = sampleSize; cPsi.samplePointers = samplePointers; cPsi.pointerSize = pointerSize; cPsi.stream = stream; cPsi.streamSize = streamSize; cPsi.totalMem = totalSize; //frees resources not needed in advance free(diffs); free(huffmanDst); //returns the data structure that holds the compressed psi. return cPsi; }