void cofactor(ECn3 &S,Big &x, ZZn2& X) { // S=Phi(2xP)+phi^2(2xP) ZZn6 X1,X2,Y1,Y2; ZZn3 Sx,Sy,T; ECn3 S2; int qnr=get_mip()->cnr; S*=x; S+=S; // hard work done here S.get(Sx,Sy); // untwist Sx=Sx/qnr; Sy=tx(Sy); Sy=Sy/(qnr*qnr); X1=shuffle(Sx,(ZZn3)0); Y1=shuffle((ZZn3)0,Sy); X1.powq(X); Y1.powq(X); X2=X1; Y2=Y1; X2.powq(X); Y2.powq(X); unshuffle(X1,Sx,T); unshuffle(Y1,T,Sy); // twist Sx=qnr*Sx; Sy=txd(Sy*qnr*qnr); S.set(Sx,Sy); unshuffle(X2,Sx,T); unshuffle(Y2,T,Sy); //twist (again, like we did last summer...) Sx=qnr*Sx; Sy=txd(Sy*qnr*qnr); S2.set(Sx,Sy); S+=S2; }
mergeTD(int a[], int l, int r){ int i, m = (l+r)/2; if(r == l+1) compexch(a[l], a[r]); if(r < l+2) return ; unshuffle(a, l, r); mergeTD(a, l, m); mergeTD(a, m+1, r); shuffle(a, l, r); for(i = l + 1; i < r; i+=2) compexch(a[i], a[i+1]); }
/* ***************************************************** ** in-place Radix-2 inverse FFT for real values ** (by the so-called "packing method") ** data: array of doubles: ** re(0),re(size/2),re(1),im(1),re(2),im(2),...,re(size/2-1),im(size/2-1) ** ** output: ** re(0),re(1),re(2),...,re(size-1) ** NOT normalized by array length ** ** Source: see the routines it calls ... ******************************************************* */ void irealfft_packed(MYFLT *data, MYFLT *outdata, int size, MYFLT *twiddle) { int i; size >>= 1; unrealize(data, size); unshuffle(data, size); inverse_dit_butterfly(data, size, twiddle); size <<= 1; for (i=0; i<size; i++) outdata[i] = data[i] * 2; }
/* ***************************************************** ** in-place Radix-2 FFT for real values ** (by the so-called "packing method") ** data: array of doubles: ** re(0),re(1),re(2),...,re(size-1) ** ** output: ** re(0),re(size/2),re(1),im(1),re(2),im(2),...,re(size/2-1),im(size/2-1) ** normalized by array length ** ** Source: see the routines it calls ... ******************************************************* */ void realfft_packed(MYFLT *data, MYFLT *outdata, int size, MYFLT *twiddle) { int i; size >>= 1; dif_butterfly(data, size, twiddle); unshuffle(data, size); realize(data, size); size <<= 1; for (i=0; i<size; i++) outdata[i] = data[i] / size; }
void q_power_frobenius(ECn3 &S,ZZn2& X) { ZZn6 X1,X2,Y1,Y2; ZZn3 Sx,Sy,T; int qnr=get_mip()->cnr; S.get(Sx,Sy); // untwist Sx=Sx/qnr; Sy=tx(Sy); Sy=Sy/(qnr*qnr); X1=shuffle(Sx,(ZZn3)0); Y1=shuffle((ZZn3)0,Sy); X1.powq(X); Y1.powq(X); unshuffle(X1,Sx,T); unshuffle(Y1,T,Sy); // twist Sx=qnr*Sx; Sy=txd(Sy*qnr*qnr); S.set(Sx,Sy); }
int main(int argc,char *argv[]) { int N=atoi(argv[1]); item_t *array=malloc(N*sizeof(*array)); int i; srand((unsigned)time(NULL)); for(i=0;i<N;i++){ array[i]=rand()%N; } print_array(array,N); shuffle(array,0,N-1); print_array(array,N); unshuffle(array,0,N-1); print_array(array,N); return(0); }
T unshuffle(const T& data, uint16_t blocksize=SHUFFLE_DEFAULT_BLOCK_SIZE) { size_t datatype_size = sizeof(typename T::value_type); size_t sz = data.size(); // allocate target T result = T(sz); sz *= datatype_size; switch(datatype_size) { case 0: case 1: unshuffle<1>(reinterpret_cast<const unsigned char*>(data.data()), reinterpret_cast<unsigned char*>(result.data()), sz, blocksize); break; case 2: unshuffle<2>(reinterpret_cast<const unsigned char*>(data.data()), reinterpret_cast<unsigned char*>(result.data()), sz, blocksize); break; case 4: unshuffle<4>(reinterpret_cast<const unsigned char*>(data.data()), reinterpret_cast<unsigned char*>(result.data()), sz, blocksize); break; case 8: unshuffle<8>(reinterpret_cast<const unsigned char*>(data.data()), reinterpret_cast<unsigned char*>(result.data()), sz, blocksize); break; #if SHUFFLE_MAX_DATATYPE_SIZE >= 16 case 16: unshuffle<16>(reinterpret_cast<const unsigned char*>(data.data()), reinterpret_cast<unsigned char*>(result.data()), sz, blocksize); break; #endif #if SHUFFLE_MAX_DATATYPE_SIZE >= 32 case 32: unshuffle<32>(reinterpret_cast<const unsigned char*>(data.data()), reinterpret_cast<unsigned char*>(result.data()), sz, blocksize); break; #endif #if SHUFFLE_MAX_DATATYPE_SIZE >= 64 case 64: unshuffle<64>(reinterpret_cast<const unsigned char*>(data.data()), reinterpret_cast<unsigned char*>(result.data()), sz, blocksize); break; #endif default: unshuffle(reinterpret_cast<const unsigned char*>(data.data()), reinterpret_cast<unsigned char*>(result.data()), sz, datatype_size, blocksize); } return result; }
/* ** The Batcher's merge operation is the key of the Batcher's mergesort. To ** make it general, we should take account of two conditions: if the size ** of the first ordered half is even, in the file to be merged, we should ** start form left+1 in the final compare-exchange loop; if the size of the ** first half is odd, we start from left in the loop. */ void batcher_merge(item_t *array,int left,int right,int middle) { int m=(left+right)/2; int n=right-left+1; int half=middle-left+1; int i; if(right-left+1<=1){ return; } if(right-left+1==2){ compare_swap_item(&array[left],&array[right]); return; } unshuffle(array,left,right); batcher_merge(array,left,m,(half+1)/2+left-1); batcher_merge(array,m+1,right,half/2+m); shuffle(array,left,right); if(half%2==0){ for(i=left+1;i<right;i+=2){ compare_swap_item(&array[i],&array[i+1]); } return; } if(half%2==1){ for(i=left;i<right;i+=2){ compare_swap_item(&array[i],&array[i+1]); } } }
void hinv(Pix *a, int nx, int ny) { int nmax, log2n, h0, hx, hy, hc, i, j, k; int nxtop, nytop, nxf, nyf, c; int oddx, oddy; int shift; int s10, s00; Pix *tmp; /* * log2n is log2 of max(nx, ny) rounded up to next power of 2 */ nmax = ny; if(nx > nmax) nmax = nx; log2n = log(nmax)/LN2 + 0.5; if(nmax > (1<<log2n)) log2n++; /* * get temporary storage for shuffling elements */ tmp = (Pix*)malloc(((nmax+1)/2) * sizeof(*tmp)); if(tmp == nil) { fprint(2, "hinv: insufficient memory\n"); exits("memory"); } /* * do log2n expansions * * We're indexing a as a 2-D array with dimensions (nx,ny). */ shift = 1; nxtop = 1; nytop = 1; nxf = nx; nyf = ny; c = 1<<log2n; for(k = log2n-1; k>=0; k--) { /* * this somewhat cryptic code generates the sequence * ntop[k-1] = (ntop[k]+1)/2, where ntop[log2n] = n */ c = c>>1; nxtop = nxtop<<1; nytop = nytop<<1; if(nxf <= c) nxtop--; else nxf -= c; if(nyf <= c) nytop--; else nyf -= c; /* * halve divisors on last pass */ if(k == 0) shift = 0; /* * unshuffle in each dimension to interleave coefficients */ for(i = 0; i<nxtop; i++) unshuffle1(&a[ny*i], nytop, tmp); for(j = 0; j<nytop; j++) unshuffle(&a[j], nxtop, ny, tmp); oddx = nxtop & 1; oddy = nytop & 1; for(i = 0; i<nxtop-oddx; i += 2) { s00 = ny*i; /* s00 is index of a[i,j] */ s10 = s00+ny; /* s10 is index of a[i+1,j] */ for(j = 0; j<nytop-oddy; j += 2) { /* * Multiply h0,hx,hy,hc by 2 (1 the last time through). */ h0 = a[s00 ] << shift; hx = a[s10 ] << shift; hy = a[s00+1] << shift; hc = a[s10+1] << shift; /* * Divide sums by 4 (shift right 2 bits). * Add 1 to round -- note that these values are always * positive so we don't need to do anything special * for rounding negative numbers. */ a[s10+1] = (h0 + hx + hy + hc + 2) >> 2; a[s10 ] = (h0 + hx - hy - hc + 2) >> 2; a[s00+1] = (h0 - hx + hy - hc + 2) >> 2; a[s00 ] = (h0 - hx - hy + hc + 2) >> 2; s00 += 2; s10 += 2; } if(oddy) { /* * do last element in row if row length is odd * s00+1, s10+1 are off edge */ h0 = a[s00 ] << shift; hx = a[s10 ] << shift; a[s10 ] = (h0 + hx + 2) >> 2; a[s00 ] = (h0 - hx + 2) >> 2; } } if(oddx) { /* * do last row if column length is odd * s10, s10+1 are off edge */ s00 = ny*i; for(j = 0; j<nytop-oddy; j += 2) { h0 = a[s00 ] << shift; hy = a[s00+1] << shift; a[s00+1] = (h0 + hy + 2) >> 2; a[s00 ] = (h0 - hy + 2) >> 2; s00 += 2; } if(oddy) { /* * do corner element if both row and column lengths are odd * s00+1, s10, s10+1 are off edge */ h0 = a[s00 ] << shift; a[s00 ] = (h0 + 2) >> 2; } } } free(tmp); }
/* Decompress & unshuffle a single block */ static int blosc_d(uint32_t blocksize, int32_t leftoverblock, uint8_t *src, uint8_t *dest, uint8_t *tmp, uint8_t *tmp2) { int32_t j, neblock, nsplits; int32_t nbytes; /* number of decompressed bytes in split */ int32_t cbytes; /* number of compressed bytes in split */ int32_t ctbytes = 0; /* number of compressed bytes in block */ int32_t ntbytes = 0; /* number of uncompressed bytes in block */ uint8_t *_tmp; uint32_t typesize = params.typesize; if ((params.flags & BLOSC_DOSHUFFLE) && (typesize > 1)) { _tmp = tmp; } else { _tmp = dest; } /* Compress for each shuffled slice split for this block. */ if ((typesize <= MAX_SPLITS) && (blocksize/typesize) >= MIN_BUFFERSIZE && (!leftoverblock)) { nsplits = typesize; } else { nsplits = 1; } neblock = blocksize / nsplits; for (j = 0; j < nsplits; j++) { cbytes = sw32(((uint32_t *)(src))[0]); /* amount of compressed bytes */ src += sizeof(int32_t); ctbytes += sizeof(int32_t); /* Uncompress */ if (cbytes == neblock) { memcpy(_tmp, src, neblock); nbytes = neblock; } else { nbytes = blosclz_decompress(src, cbytes, _tmp, neblock); if (nbytes != neblock) { return -2; } } src += cbytes; ctbytes += cbytes; _tmp += nbytes; ntbytes += nbytes; } /* Closes j < nsplits */ if ((params.flags & BLOSC_DOSHUFFLE) && (typesize > 1)) { if ((uintptr_t)dest % 16 == 0) { /* 16-bytes aligned dest. SSE2 unshuffle will work. */ unshuffle(typesize, blocksize, tmp, dest); } else { /* dest is not aligned. Use tmp2, which is aligned, and copy. */ unshuffle(typesize, blocksize, tmp, tmp2); if (tmp2 != dest) { /* Copy only when dest is not tmp2 (e.g. not blosc_getitem()) */ memcpy(dest, tmp2, blocksize); } } } /* Return the number of uncompressed bytes */ return ntbytes; }