void Cmodulus::iFFT(zz_pX &x, const vec_long& y)const { FHE_TIMER_START; zz_pBak bak; bak.save(); context.restore(); if (zMStar->getPow2()) { // special case when m is a power of 2 long k = zMStar->getPow2(); long phim = (1L << (k-1)); long p = zz_p::modulus(); const zz_p *ipowers_p = (*ipowers).rep.elts(); const mulmod_precon_t *ipowers_aux_p = ipowers_aux.elts(); const long *yp = y.elts(); vec_long& tmp = Cmodulus::getScratch_vec_long(); tmp.SetLength(phim); long *tmp_p = tmp.elts(); #ifdef FHE_OPENCL AltFFTRev1(tmp_p, yp, k-1, *altFFTInfo); #else FFTRev1(tmp_p, yp, k-1, *zz_pInfo->p_info); #endif x.rep.SetLength(phim); zz_p *xp = x.rep.elts(); for (long i = 0; i < phim; i++) xp[i].LoopHole() = MulModPrecon(tmp_p[i], rep(ipowers_p[i]), p, ipowers_aux_p[i]); x.normalize(); return; } zz_p rt; long m = getM(); // convert input to zpx format, initializing only the coeffs i s.t. (i,m)=1 x.rep.SetLength(m); long i,j; for (i=j=0; i<m; i++) if (zMStar->inZmStar(i)) x.rep[i].LoopHole() = y[j++]; // DIRT: y[j] already reduced x.normalize(); conv(rt, rInv); // convert rInv to zp format BluesteinFFT(x, m, rt, *ipowers, ipowers_aux, *iRb); // call the FFT routine // reduce the result mod (Phi_m(X),q) and copy to the output polynomial x { FHE_NTIMER_START(iFFT_division); rem(x, x, *phimx); // out %= (Phi_m(X),q) } // normalize zz_p mm_inv; conv(mm_inv, m_inv); x *= mm_inv; }
int main() { #ifdef NTL_SPMM_ULL if (sizeof(NTL_ULL_TYPE) < 2*sizeof(long)) { printf("999999999999999 "); print_flag(); return 0; } #endif long n, k; n = 200; k = 10*NTL_ZZ_NBITS; ZZ p; RandomLen(p, k); ZZ_p::init(p); // initialization ZZ_pX f, g, h, r1, r2, r3; random(g, n); // g = random polynomial of degree < n random(h, n); // h = " " random(f, n); // f = " " SetCoeff(f, n); // Sets coefficient of X^n to 1 // For doing arithmetic mod f quickly, one must pre-compute // some information. ZZ_pXModulus F; build(F, f); PlainMul(r1, g, h); // this uses classical arithmetic PlainRem(r1, r1, f); MulMod(r2, g, h, F); // this uses the FFT MulMod(r3, g, h, f); // uses FFT, but slower // compare the results... if (r1 != r2) { printf("999999999999999 "); print_flag(); return 0; } else if (r1 != r3) { printf("999999999999999 "); print_flag(); return 0; } double t; long i, j; long iter; const int nprimes = 30; const long L = 12; const long N = 1L << L; long r; for (r = 0; r < nprimes; r++) UseFFTPrime(r); vec_long aa[nprimes], AA[nprimes]; for (r = 0; r < nprimes; r++) { aa[r].SetLength(N); AA[r].SetLength(N); for (i = 0; i < N; i++) aa[r][i] = RandomBnd(GetFFTPrime(r)); FFTFwd(AA[r].elts(), aa[r].elts(), L, r); FFTRev1(AA[r].elts(), AA[r].elts(), L, r); } iter = 1; do { t = GetTime(); for (j = 0; j < iter; j++) { for (r = 0; r < nprimes; r++) { long *AAp = AA[r].elts(); long *aap = aa[r].elts(); long q = GetFFTPrime(r); mulmod_t qinv = GetFFTPrimeInv(r); FFTFwd(AAp, aap, L, r); FFTRev1(AAp, aap, L, r); for (i = 0; i < N; i++) AAp[i] = NormalizedMulMod(AAp[i], aap[i], q, qinv); } } t = GetTime() - t; iter = 2*iter; } while(t < 1); iter = iter/2; iter = long((1.5/t)*iter) + 1; double tvec[5]; long w; for (w = 0; w < 5; w++) { t = GetTime(); for (j = 0; j < iter; j++) { for (r = 0; r < nprimes; r++) { long *AAp = AA[r].elts(); long *aap = aa[r].elts(); long q = GetFFTPrime(r); mulmod_t qinv = GetFFTPrimeInv(r); FFTFwd(AAp, aap, L, r); FFTRev1(AAp, aap, L, r); for (i = 0; i < N; i++) AAp[i] = NormalizedMulMod(AAp[i], aap[i], q, qinv); } } t = GetTime() - t; tvec[w] = t; } t = clean_data(tvec); t = floor((t/iter)*1e13); if (t < 0 || t >= 1e15) printf("999999999999999 "); else printf("%015.0f ", t); printf(" [%ld] ", iter); print_flag(); return 0; }