/** * Precomputation on the evaluation key * allows for faster key switching */ void YASHE::dotEval(NTL::ZZ_pX& output, const std::vector<NTL::ZZ_pX>& a) { long n = cycloMod.n; long da = maxDegree; NTL::FFTRep fftOutput(NTL::INIT_SIZE, cycloMod.k); ToFFTRep(fftOutput, NTL::ZZ_pX(0), cycloMod.k); NTL::ZZ_pX P1(NTL::INIT_SIZE, n), P2(NTL::INIT_SIZE, n); NTL::FFTRep R1(NTL::INIT_SIZE, cycloMod.l), R2(NTL::INIT_SIZE, cycloMod.l); for (long i = 0; i < decompSize; i++) { ToFFTRep(R1, a[i], cycloMod.l); mul(R2, R1, evalKeyMult[i].B1); FromFFTRep(P1, R2, n-1, 2*n-3); reduce(R1, R1, cycloMod.k); mul(R1, R1, evalKeyMult[i].B2); ToFFTRep(R2, P1, cycloMod.k); mul(R2, R2, cycloMod.FRep); sub(R1, R1, R2); add(fftOutput, R1, fftOutput); } FromFFTRep(output, fftOutput, 0, n-1); }
void YASHE::roundMultiply(NTL::ZZ_pX& output, const NTL::ZZ_pXMultiplier& a, const NTL::ZZ_pXMultiplier& b) { // maximum is q^2 * (maxDegree + 1) NTL::ZZ_pPush push((cModulus * cModulus)/pModulus); long n = bigCycloMod.n; NTL::ZZ_pX product, P1(NTL::INIT_SIZE, n), P2(NTL::INIT_SIZE, n); NTL::FFTRep R1(NTL::INIT_SIZE, bigCycloMod.l), R2(NTL::INIT_SIZE, bigCycloMod.l); ToFFTRep(R1, a.val(), bigCycloMod.l); mul(R2, R1, b.B1); FromFFTRep(P1, R2, n-1, 2*n-3); mul(R1, a.B2, b.B2); ToFFTRep(R2, P1, bigCycloMod.k); mul(R2, R2, bigCycloMod.FRep); sub(R1, R1, R2); FromFFTRep(product, R1, 0, n-1); output.SetLength(maxDegree + 1); NTL::ZZ quotient, remainder; for (long i = 0; i <= maxDegree; i++) { DivRem(quotient, remainder, pModulus * rep(product[i]), cModulus); // Rounding using remainder if (remainder * 2 > cModulus) { quotient += 1; } output[i] = NTL::conv<NTL::ZZ_p>(quotient); } }
static void ComputeTraceVec(vec_ZZ_p& S, const ZZ_pXModulus& F) { if (!F.UseFFT) { PlainTraceVec(S, F.f); return; } long i; long n = F.n; FFTRep R; ZZ_pX P, g; g.rep.SetLength(n-1); for (i = 1; i < n; i++) mul(g.rep[n-i-1], F.f.rep[n-i], i); g.normalize(); ToFFTRep(R, g, F.l); mul(R, R, F.HRep); FromFFTRep(P, R, n-2, 2*n-4); S.SetLength(n); S[0] = n; for (i = 1; i < n; i++) negate(S[i], coeff(P, n-1-i)); }
void FFTMulTrunc(ZZ_pX& x, const ZZ_pX& a, const ZZ_pX& b, long n) { if (IsZero(a) || IsZero(b)) { clear(x); return; } long d = deg(a) + deg(b); if (n > d + 1) n = d + 1; long k = NextPowerOfTwo(d + 1); FFTRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); ToFFTRep(R1, a, k); ToFFTRep(R2, b, k); mul(R1, R1, R2); FromFFTRep(x, R1, 0, n-1); }
void mul(ZZ_pXMatrix& A, ZZ_pXMatrix& B, ZZ_pXMatrix& C) // A = B*C, B and C are destroyed { long db = deg(B(1,1)); long dc = deg(C(1,1)); long da = db + dc; long k = NextPowerOfTwo(da+1); FFTRep B00, B01, B10, B11, C0, C1, T1, T2; ToFFTRep(B00, B(0,0), k); B(0,0).kill(); ToFFTRep(B01, B(0,1), k); B(0,1).kill(); ToFFTRep(B10, B(1,0), k); B(1,0).kill(); ToFFTRep(B11, B(1,1), k); B(1,1).kill(); ToFFTRep(C0, C(0,0), k); C(0,0).kill(); ToFFTRep(C1, C(1,0), k); C(1,0).kill(); mul(T1, B00, C0); mul(T2, B01, C1); add(T1, T1, T2); FromFFTRep(A(0,0), T1, 0, da); mul(T1, B10, C0); mul(T2, B11, C1); add(T1, T1, T2); FromFFTRep(A(1,0), T1, 0, da); ToFFTRep(C0, C(0,1), k); C(0,1).kill(); ToFFTRep(C1, C(1,1), k); C(1,1).kill(); mul(T1, B00, C0); mul(T2, B01, C1); add(T1, T1, T2); FromFFTRep(A(0,1), T1, 0, da); mul(T1, B10, C0); mul(T2, B11, C1); add(T1, T1, T2); FromFFTRep(A(1,1), T1, 0, da); }
void FFTSqrTrunc(ZZ_pX& x, const ZZ_pX& a, long n) { if (IsZero(a)) { clear(x); return; } long d = 2*deg(a); if (n > d + 1) n = d + 1; long k = NextPowerOfTwo(d + 1); FFTRep R1(INIT_SIZE, k); ToFFTRep(R1, a, k); mul(R1, R1, R1); FromFFTRep(x, R1, 0, n-1); }
void BuildFromRoots(ZZ_pX& x, const vec_ZZ_p& a) { long n = a.length(); if (n == 0) { set(x); return; } long k0 = NextPowerOfTwo(NTL_ZZ_pX_FFT_CROSSOVER); long crossover = 1L << k0; if (n <= crossover) { x.rep.SetMaxLength(n+1); x.rep = a; IterBuild(&x.rep[0], n); x.rep.SetLength(n+1); SetCoeff(x, n); return; } long k = NextPowerOfTwo(n); long m = 1L << k; long i, j; long l, width; ZZ_pX b(INIT_SIZE, m+1); b.rep = a; b.rep.SetLength(m+1); for (i = n; i < m; i++) clear(b.rep[i]); set(b.rep[m]); FFTRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); ZZ_p t1, one; set(one); vec_ZZ_p G(INIT_SIZE, crossover), H(INIT_SIZE, crossover); ZZ_p *g = G.elts(); ZZ_p *h = H.elts(); ZZ_p *tmp; for (i = 0; i < m; i+= crossover) { for (j = 0; j < crossover; j++) negate(g[j], b.rep[i+j]); if (k0 > 0) { for (j = 0; j < crossover; j+=2) { mul(t1, g[j], g[j+1]); add(g[j+1], g[j], g[j+1]); g[j] = t1; } } for (l = 1; l < k0; l++) { width = 1L << l; for (j = 0; j < crossover; j += 2*width) mul(&h[j], &g[j], &g[j+width], width); tmp = g; g = h; h = tmp; } for (j = 0; j < crossover; j++) b.rep[i+j] = g[j]; } for (l = k0; l < k; l++) { width = 1L << l; for (i = 0; i < m; i += 2*width) { t1 = b.rep[i+width]; set(b.rep[i+width]); ToFFTRep(R1, b, l+1, i, i+width); b.rep[i+width] = t1; t1 = b.rep[i+2*width]; set(b.rep[i+2*width]); ToFFTRep(R2, b, l+1, i+width, i+2*width); b.rep[i+2*width] = t1; mul(R1, R1, R2); FromFFTRep(&b.rep[i], R1, 0, 2*width-1); sub(b.rep[i], b.rep[i], one); } } x.rep.SetLength(n+1); long delta = m-n; for (i = 0; i <= n; i++) x.rep[i] = b.rep[i+delta]; // no need to normalize }
void mul(ZZ_pX& U, ZZ_pX& V, const ZZ_pXMatrix& M) // (U, V)^T = M*(U, V)^T { long d = deg(U) - deg(M(1,1)); long k = NextPowerOfTwo(d - 1); // When the GCD algorithm is run on polynomials of degree n, n-1, // where n is a power of two, then d-1 is likely to be a power of two. // It would be more natural to set k = NextPowerOfTwo(d+1), but this // would be much less efficient in this case. // We optimize this case, as it does sometimes arise naturally // in some situations. long n = (1L << k); long xx; ZZ_p a0, a1, b0, b1, c0, d0, u0, u1, v0, v1, nu0, nu1, nv0; NTL_ZZRegister(t1); NTL_ZZRegister(t2); if (n == d-1) xx = 1; else if (n == d) xx = 2; else xx = 3; switch (xx) { case 1: GetCoeff(a0, M(0,0), 0); GetCoeff(a1, M(0,0), 1); GetCoeff(b0, M(0,1), 0); GetCoeff(b1, M(0,1), 1); GetCoeff(c0, M(1,0), 0); GetCoeff(d0, M(1,1), 0); GetCoeff(u0, U, 0); GetCoeff(u1, U, 1); GetCoeff(v0, V, 0); GetCoeff(v1, V, 1); mul(t1, rep(a0), rep(u0)); mul(t2, rep(b0), rep(v0)); add(t1, t1, t2); conv(nu0, t1); mul(t1, rep(a1), rep(u0)); mul(t2, rep(a0), rep(u1)); add(t1, t1, t2); mul(t2, rep(b1), rep(v0)); add(t1, t1, t2); mul(t2, rep(b0), rep(v1)); add(t1, t1, t2); conv(nu1, t1); mul(t1, rep(c0), rep(u0)); mul(t2, rep(d0), rep(v0)); add (t1, t1, t2); conv(nv0, t1); break; case 2: GetCoeff(a0, M(0,0), 0); GetCoeff(b0, M(0,1), 0); GetCoeff(u0, U, 0); GetCoeff(v0, V, 0); mul(t1, rep(a0), rep(u0)); mul(t2, rep(b0), rep(v0)); add(t1, t1, t2); conv(nu0, t1); break; case 3: break; } FFTRep RU(INIT_SIZE, k), RV(INIT_SIZE, k), R1(INIT_SIZE, k), R2(INIT_SIZE, k); ToFFTRep(RU, U, k); ToFFTRep(RV, V, k); ToFFTRep(R1, M(0,0), k); mul(R1, R1, RU); ToFFTRep(R2, M(0,1), k); mul(R2, R2, RV); add(R1, R1, R2); FromFFTRep(U, R1, 0, d); ToFFTRep(R1, M(1,0), k); mul(R1, R1, RU); ToFFTRep(R2, M(1,1), k); mul(R2, R2, RV); add(R1, R1, R2); FromFFTRep(V, R1, 0, d-1); // now fix-up results switch (xx) { case 1: GetCoeff(u0, U, 0); sub(u0, u0, nu0); SetCoeff(U, d-1, u0); SetCoeff(U, 0, nu0); GetCoeff(u1, U, 1); sub(u1, u1, nu1); SetCoeff(U, d, u1); SetCoeff(U, 1, nu1); GetCoeff(v0, V, 0); sub(v0, v0, nv0); SetCoeff(V, d-1, v0); SetCoeff(V, 0, nv0); break; case 2: GetCoeff(u0, U, 0); sub(u0, u0, nu0); SetCoeff(U, d, u0); SetCoeff(U, 0, nu0); break; } }
int main() { #if (defined(NTL_CRT_ALTCODE) && !(defined(NTL_HAVE_LL_TYPE) && NTL_ZZ_NBITS == NTL_BITS_PER_LONG)) { printf("999999999999999 "); print_flag(); return 0; } #endif SetSeed(ZZ(0)); long n, k; n = 1024; k = 30*NTL_SP_NBITS; ZZ p; RandomLen(p, k); if (!IsOdd(p)) p++; ZZ_p::init(p); // initialization ZZ_pX f, g, h, r1, r2, r3; random(g, n); // g = random polynomial of degree < n random(h, n); // h = " " random(f, n); // f = " " SetCoeff(f, n); // Sets coefficient of X^n to 1 // For doing arithmetic mod f quickly, one must pre-compute // some information. ZZ_pXModulus F; build(F, f); PlainMul(r1, g, h); // this uses classical arithmetic PlainRem(r1, r1, f); MulMod(r2, g, h, F); // this uses the FFT MulMod(r3, g, h, f); // uses FFT, but slower // compare the results... if (r1 != r2) { printf("999999999999999 "); print_flag(); return 0; } else if (r1 != r3) { printf("999999999999999 "); print_flag(); return 0; } double t; long i; long iter; ZZ_pX a, b, c; random(a, n); random(b, n); long da = deg(a); long db = deg(b); long dc = da + db; long l = NextPowerOfTwo(dc+1); FFTRep arep, brep, crep; ToFFTRep(arep, a, l, 0, da); ToFFTRep(brep, b, l, 0, db); mul(crep, arep, brep); ZZ_pXModRep modrep; FromFFTRep(modrep, crep); FromZZ_pXModRep(c, modrep, 0, dc); iter = 1; do { t = GetTime(); for (i = 0; i < iter; i++) { FromZZ_pXModRep(c, modrep, 0, dc); } t = GetTime() - t; iter = 2*iter; } while(t < 1); iter = iter/2; iter = long((3/t)*iter) + 1; double tvec[5]; long w; for (w = 0; w < 5; w++) { t = GetTime(); for (i = 0; i < iter; i++) { FromZZ_pXModRep(c, modrep, 0, dc); } t = GetTime() - t; tvec[w] = t; } t = clean_data(tvec); t = floor((t/iter)*1e12); // The following is just to test some tuning Wizard logic -- // be sure to get rid of this!! #if (defined(NTL_CRT_ALTCODE)) // t *= 1.12; #endif if (t < 0 || t >= 1e15) printf("999999999999999 "); else printf("%015.0f ", t); printf(" [%ld] ", iter); print_flag(); return 0; }