void BluesteinFFT(zz_pX& x, long n, const zz_p& root, const zz_pX& powers, const Vec<mulmod_precon_t>& powers_aux, const fftRep& Rb) { // FHE_TIMER_START; if (IsZero(x)) return; if (n<=0) { clear(x); return; } long p = zz_p::modulus(); long dx = deg(x); for (long i=0; i<=dx; i++) { x[i].LoopHole() = MulModPrecon(rep(x[i]), rep(powers[i]), p, powers_aux[i]); } x.normalize(); long k = NextPowerOfTwo(2*n-1); fftRep& Ra = Cmodulus::getScratch_fftRep(k); TofftRep(Ra, x, k); mul(Ra,Ra,Rb); // multiply in FFT representation FromfftRep(x, Ra, n-1, 2*(n-1)); // then convert back dx = deg(x); for (long i=0; i<=dx; i++) { x[i].LoopHole() = MulModPrecon(rep(x[i]), rep(powers[i]), p, powers_aux[i]); } x.normalize(); }
bool intVecCRT(vec_ZZ& vp, const ZZ& p, const zzvec& vq, long q) { long pInv = InvMod(rem(p,q), q); // p^{-1} mod q long n = min(vp.length(),vq.length()); long q_over_2 = q/2; ZZ tmp; long vqi; mulmod_precon_t pqInv = PrepMulModPrecon(pInv, q); for (long i=0; i<n; i++) { conv(vqi, vq[i]); // convert to single precision long vq_minus_vp_mod_q = SubMod(vqi, rem(vp[i],q), q); long delta_times_pInv = MulModPrecon(vq_minus_vp_mod_q, pInv, q, pqInv); if (delta_times_pInv > q_over_2) delta_times_pInv -= q; mul(tmp, delta_times_pInv, p); // tmp = [(vq_i-vp_i)*p^{-1}]_q * p vp[i] += tmp; } // other entries (if any) are 0 mod q for (long i=vq.length(); i<vp.length(); i++) { long minus_vp_mod_q = NegateMod(rem(vp[i],q), q); long delta_times_pInv = MulModPrecon(minus_vp_mod_q, pInv, q, pqInv); if (delta_times_pInv > q_over_2) delta_times_pInv -= q; mul(tmp, delta_times_pInv, p); // tmp = [(vq_i-vp_i)*p^{-1}]_q * p vp[i] += tmp; } return (vp.length()==vq.length()); }
static void add1Dmats4dim(FHESecKey& sKey, long i, long keyID) { const FHEcontext &context = sKey.getContext(); long m = context.zMStar.getM(); computeParams(context,m,i); // defines vars: native, ord, gi, g2md, giminv, g2mdminv /* MAUTO vector<long> vals; */ for (long j=1,val=gi; j < ord; j++) { // From s(X^val) to s(X) sKey.GenKeySWmatrix(1, val, keyID, keyID); if (!native) { // also from s(X^{g^{i-ord}}) to s(X) long val2 = MulModPrecon(val,g2md,m,g2mdminv); sKey.GenKeySWmatrix(1, val2, keyID, keyID); /* MAUTO vals.push_back(val2); */ } /* MAUTO vals.push_back(val); */ val = MulModPrecon(val, gi, m, giminv); // val *= g mod m (= g^{j+1}) } if (!native) { sKey.GenKeySWmatrix(1, context.zMStar.genToPow(i, -ord), keyID, keyID); } /* MAUTO sKey.resetTree(i,keyID); // remove existing tree, if any sKey.add2tree(i, 1, vals, keyID); */ }
void InnerProduct(zz_pX& x, const vec_zz_p& v, long low, long high, const vec_zz_pX& H, long n, vec_zz_p& t) { zz_p s; long i, j; zz_p *tp = t.elts(); for (j = 0; j < n; j++) clear(tp[j]); long p = zz_p::modulus(); double pinv = zz_p::ModulusInverse(); high = min(high, v.length()-1); for (i = low; i <= high; i++) { const vec_zz_p& h = H[i-low].rep; long m = h.length(); zz_p w = (v[i]); long W = rep(w); mulmod_precon_t Wpinv = PrepMulModPrecon(W, p, pinv); // ((double) W)*pinv; const zz_p *hp = h.elts(); for (j = 0; j < m; j++) { long S = MulModPrecon(rep(hp[j]), W, p, Wpinv); S = AddMod(S, rep(tp[j]), p); tp[j].LoopHole() = S; } } x.rep = t; x.normalize(); }
void mul(mat_zz_p& X, const mat_zz_p& A, zz_p b) { long n = A.NumRows(); long m = A.NumCols(); X.SetDims(n, m); long i, j; if (n == 0 || m == 0 || (n == 1 && m == 1)) { for (i = 0; i < n; i++) for (j = 0; j < m; j++) mul(X[i][j], A[i][j], b); } else { long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long bb = rep(b); mulmod_precon_t bpinv = PrepMulModPrecon(bb, p, pinv); for (i = 0; i < n; i++) { const zz_p *ap = A[i].elts(); zz_p *xp = X[i].elts(); for (j = 0; j < m; j++) xp[j].LoopHole() = MulModPrecon(rep(ap[j]), bb, p, bpinv); } } }
void mul(vec_zz_p& x, const vec_zz_p& a, zz_p b) { long n = a.length(); x.SetLength(n); long i; if (n <= 1) { for (i = 0; i < n; i++) mul(x[i], a[i], b); } else { long p = zz_p::modulus(); double pinv = zz_p::ModulusInverse(); long bb = rep(b); mulmod_precon_t bpinv = PrepMulModPrecon(bb, p, pinv); const zz_p *ap = a.elts(); zz_p *xp = x.elts(); for (i = 0; i < n; i++) xp[i].LoopHole() = MulModPrecon(rep(ap[i]), bb, p, bpinv); } }
void mul_aux(vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b) { long n = A.NumRows(); long l = A.NumCols(); if (l != b.length()) LogicError("matrix mul: dimension mismatch"); x.SetLength(n); zz_p* xp = x.elts(); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long i, k; long acc, tmp; const zz_p* bp = b.elts(); if (n <= 1) { for (i = 0; i < n; i++) { acc = 0; const zz_p* ap = A[i].elts(); for (k = 0; k < l; k++) { tmp = MulMod(rep(ap[k]), rep(bp[k]), p, pinv); acc = AddMod(acc, tmp, p); } xp[i].LoopHole() = acc; } } else { Vec<mulmod_precon_t>::Watcher watch_precon_vec(precon_vec); precon_vec.SetLength(l); mulmod_precon_t *bpinv = precon_vec.elts(); for (k = 0; k < l; k++) bpinv[k] = PrepMulModPrecon(rep(bp[k]), p, pinv); for (i = 0; i < n; i++) { acc = 0; const zz_p* ap = A[i].elts(); for (k = 0; k < l; k++) { tmp = MulModPrecon(rep(ap[k]), rep(bp[k]), p, bpinv[k]); acc = AddMod(acc, tmp, p); } xp[i].LoopHole() = acc; } } }
long polyEvalMod(const ZZX& poly, long x, long p) { long ret = 0; x %= p; if (x<0) x += p; mulmod_precon_t xpinv = PrepMulModPrecon(x, p); for (long i=deg(poly); i>=0; i--) { long coeff = rem(poly[i], p); ret = AddMod(ret, coeff, p); // Add the coefficient of x^i if (i>0) ret = MulModPrecon(ret, x, p, xpinv); // then mult by x } return ret; }
void Cmodulus::FFT(vec_long &y, const ZZX& x) const { FHE_TIMER_START; zz_pBak bak; bak.save(); context.restore(); zz_pX& tmp = Cmodulus::getScratch_zz_pX(); { FHE_NTIMER_START(FFT_remainder); conv(tmp,x); // convert input to zpx format } if (!ALT_CRT && zMStar->getPow2()) { // special case when m is a power of 2 long k = zMStar->getPow2(); long phim = (1L << (k-1)); long dx = deg(tmp); long p = zz_p::modulus(); const zz_p *powers_p = (*powers).rep.elts(); const mulmod_precon_t *powers_aux_p = powers_aux.elts(); y.SetLength(phim); long *yp = y.elts(); zz_p *tmp_p = tmp.rep.elts(); for (long i = 0; i <= dx; i++) yp[i] = MulModPrecon(rep(tmp_p[i]), rep(powers_p[i]), p, powers_aux_p[i]); for (long i = dx+1; i < phim; i++) yp[i] = 0; FFTFwd(yp, yp, k-1, *zz_pInfo->p_info); return; } zz_p rt; conv(rt, root); // convert root to zp format BluesteinFFT(tmp, getM(), rt, *powers, powers_aux, *Rb); // call the FFT routine // copy the result to the output vector y, keeping only the // entries corresponding to primitive roots of unity y.SetLength(zMStar->getPhiM()); long i,j; long m = getM(); for (i=j=0; i<m; i++) if (zMStar->inZmStar(i)) y[j++] = rep(coeff(tmp,i)); }
// multiply the polynomial f by the integer a modulo q void MulMod(ZZX& out, const ZZX& f, long a, long q, bool abs/*default=true*/) { // ensure that out has the same degree as f out.SetMaxLength(deg(f)+1); // allocate space if needed if (deg(out)>deg(f)) trunc(out,out,deg(f)+1); // remove high degrees mulmod_precon_t aqinv = PrepMulModPrecon(a, q); for (long i=0; i<=deg(f); i++) { long c = rem(coeff(f,i), q); c = MulModPrecon(c, a, q, aqinv); // returns c \in [0,q-1] if (!abs && c >= q/2) c -= q; SetCoeff(out,i,c); } }
void Cmodulus::FFT_aux(vec_long &y, zz_pX& tmp) const { if (zMStar->getPow2()) { // special case when m is a power of 2 long k = zMStar->getPow2(); long phim = (1L << (k-1)); long dx = deg(tmp); long p = zz_p::modulus(); const zz_p *powers_p = (*powers).rep.elts(); const mulmod_precon_t *powers_aux_p = powers_aux.elts(); y.SetLength(phim); long *yp = y.elts(); zz_p *tmp_p = tmp.rep.elts(); for (long i = 0; i <= dx; i++) yp[i] = MulModPrecon(rep(tmp_p[i]), rep(powers_p[i]), p, powers_aux_p[i]); for (long i = dx+1; i < phim; i++) yp[i] = 0; #ifdef FHE_OPENCL AltFFTFwd(yp, yp, k-1, *altFFTInfo); #else FFTFwd(yp, yp, k-1, *zz_pInfo->p_info); #endif return; } zz_p rt; conv(rt, root); // convert root to zp format BluesteinFFT(tmp, getM(), rt, *powers, powers_aux, *Rb); // call the FFT routine // copy the result to the output vector y, keeping only the // entries corresponding to primitive roots of unity y.SetLength(zMStar->getPhiM()); long i,j; long m = getM(); for (i=j=0; i<m; i++) if (zMStar->inZmStar(i)) y[j++] = rep(coeff(tmp,i)); }
// plaintextAutomorph: an auxilliary routine...maybe palce in NumbTh? // Compute b(X) = a(X^k) mod Phi_m(X). Result is calclated in the output b // "in place", so a should not alias b. template <class RX, class RXModulus> static void plaintextAutomorph(RX& b, const RX& a, long k, const PAlgebra& zMStar, const RXModulus& PhimX) { long m = zMStar.getM(); assert(zMStar.inZmStar(k)); b.SetLength(m); for (long j = 0; j < m; j++) b[j] = 0; long d = deg(a); // compute b(X) = a(X^k) mod (X^m-1) mulmod_precon_t precon = PrepMulModPrecon(k, m); for (long j = 0; j <= d; j++) b[MulModPrecon(j, k, m, precon)] = a[j]; // b[j*k mod m] = a[j] b.normalize(); rem(b, b, PhimX); // reduce modulo the m'th cyclotomic }
// Expand index set by s1, and multiply by \prod{q \in s1}. s1 is assumed to // be disjoint from the current index set. Returns the logarithm of product. double DoubleCRT::addPrimesAndScale(const IndexSet& s1) { if (empty(s1)) return 0.0; // nothing to do assert(empty(s1 & map.getIndexSet())); // s1 is disjoint from *this // compute factor to scale existing rows ZZ factor = to_ZZ(1); double logFactor = 0.0; for (long i = s1.first(); i <= s1.last(); i = s1.next(i)) { long qi = context.ithPrime(i); factor *= qi; logFactor += log((double)qi); } // scale existing rows long phim = context.zMStar.getPhiM(); const IndexSet& iSet = map.getIndexSet(); for (long i = iSet.first(); i <= iSet.last(); i = iSet.next(i)) { long qi = context.ithPrime(i); long f = rem(factor, qi); // f = factor % qi vec_long& row = map[i]; // scale row by a factor of f modulo qi mulmod_precon_t bninv = PrepMulModPrecon(f, qi, 1.0/(double)qi); for (long j=0; j<phim; j++) row[j] = MulModPrecon(row[j], f, qi, bninv); } // insert new rows and fill them with zeros map.insert(s1); // add new rows to the map for (long i = s1.first(); i <= s1.last(); i = s1.next(i)) { vec_long& row = map[i]; for (long j=0; j<phim; j++) row[j] = 0; } return logFactor; }
void mul(vec_zz_p& x, const vec_zz_p& a, const mat_zz_p& B) { long l = a.length(); long m = B.NumCols(); if (l != B.NumRows()) LogicError("matrix mul: dimension mismatch"); if (m == 0) { x.SetLength(0); } else if (m == 1) { long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long acc, tmp; long k; acc = 0; for(k = 1; k <= l; k++) { tmp = MulMod(rep(a(k)), rep(B(k,1)), p, pinv); acc = AddMod(acc, tmp, p); } x.SetLength(1); x(1).LoopHole() = acc; } else { // m > 1. precondition long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); vec_long::Watcher watch_mul_aux_vec(mul_aux_vec); mul_aux_vec.SetLength(m); long *acc = mul_aux_vec.elts(); long j, k; const zz_p* ap = a.elts(); for (j = 0; j < m; j++) acc[j] = 0; for (k = 0; k < l; k++) { long aa = rep(ap[k]); if (aa != 0) { const zz_p* bp = B[k].elts(); long T1; mulmod_precon_t aapinv = PrepMulModPrecon(aa, p, pinv); for (j = 0; j < m; j++) { T1 = MulModPrecon(rep(bp[j]), aa, p, aapinv); acc[j] = AddMod(acc[j], T1, p); } } } x.SetLength(m); zz_p *xp = x.elts(); for (j = 0; j < m; j++) xp[j].LoopHole() = acc[j]; } }
void determinant(zz_p& d, const mat_zz_p& M_in) { long k, n; long i, j; long pos; zz_p t1, t2, t3; zz_p *x, *y; mat_zz_p M; M = M_in; n = M.NumRows(); if (M.NumCols() != n) LogicError("determinant: nonsquare matrix"); if (n == 0) { set(d); return; } zz_p det; set(det); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); for (k = 0; k < n; k++) { pos = -1; for (i = k; i < n; i++) { if (!IsZero(M[i][k])) { pos = i; break; } } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); negate(det, det); } mul(det, det, M[k][k]); inv(t3, M[k][k]); for (i = k+1; i < n; i++) { // M[i] = M[i] - M[k]*M[i,k]*t3 mul(t1, M[i][k], t3); negate(t1, t1); x = M[i].elts() + (k+1); y = M[k].elts() + (k+1); long T1 = rep(t1); mulmod_precon_t t1pinv = PrepMulModPrecon(T1, p, pinv); // T1*pinv; long T2; for (j = k+1; j < n; j++, x++, y++) { // *x = *x + (*y)*t1 T2 = MulModPrecon(rep(*y), T1, p, t1pinv); x->LoopHole() = AddMod(rep(*x), T2, p); } } } else { clear(d); return; } } d = det; }
static void mul_aux(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B) { long n = A.NumRows(); long l = A.NumCols(); long m = B.NumCols(); if (l != B.NumRows()) LogicError("matrix mul: dimension mismatch"); X.SetDims(n, m); if (m > 1) { // new preconditioning code long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); vec_long::Watcher watch_mul_aux_vec(mul_aux_vec); mul_aux_vec.SetLength(m); long *acc = mul_aux_vec.elts(); long i, j, k; for (i = 0; i < n; i++) { const zz_p* ap = A[i].elts(); for (j = 0; j < m; j++) acc[j] = 0; for (k = 0; k < l; k++) { long aa = rep(ap[k]); if (aa != 0) { const zz_p* bp = B[k].elts(); long T1; mulmod_precon_t aapinv = PrepMulModPrecon(aa, p, pinv); for (j = 0; j < m; j++) { T1 = MulModPrecon(rep(bp[j]), aa, p, aapinv); acc[j] = AddMod(acc[j], T1, p); } } } zz_p *xp = X[i].elts(); for (j = 0; j < m; j++) xp[j].LoopHole() = acc[j]; } } else { // just use the old code, w/o preconditioning long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long i, j, k; long acc, tmp; for (i = 1; i <= n; i++) { for (j = 1; j <= m; j++) { acc = 0; for(k = 1; k <= l; k++) { tmp = MulMod(rep(A(i,k)), rep(B(k,j)), p, pinv); acc = AddMod(acc, tmp, p); } X(i,j).LoopHole() = acc; } } } }
void Cmodulus::iFFT(zz_pX &x, const vec_long& y)const { FHE_TIMER_START; zz_pBak bak; bak.save(); context.restore(); if (zMStar->getPow2()) { // special case when m is a power of 2 long k = zMStar->getPow2(); long phim = (1L << (k-1)); long p = zz_p::modulus(); const zz_p *ipowers_p = (*ipowers).rep.elts(); const mulmod_precon_t *ipowers_aux_p = ipowers_aux.elts(); const long *yp = y.elts(); vec_long& tmp = Cmodulus::getScratch_vec_long(); tmp.SetLength(phim); long *tmp_p = tmp.elts(); #ifdef FHE_OPENCL AltFFTRev1(tmp_p, yp, k-1, *altFFTInfo); #else FFTRev1(tmp_p, yp, k-1, *zz_pInfo->p_info); #endif x.rep.SetLength(phim); zz_p *xp = x.rep.elts(); for (long i = 0; i < phim; i++) xp[i].LoopHole() = MulModPrecon(tmp_p[i], rep(ipowers_p[i]), p, ipowers_aux_p[i]); x.normalize(); return; } zz_p rt; long m = getM(); // convert input to zpx format, initializing only the coeffs i s.t. (i,m)=1 x.rep.SetLength(m); long i,j; for (i=j=0; i<m; i++) if (zMStar->inZmStar(i)) x.rep[i].LoopHole() = y[j++]; // DIRT: y[j] already reduced x.normalize(); conv(rt, rInv); // convert rInv to zp format BluesteinFFT(x, m, rt, *ipowers, ipowers_aux, *iRb); // call the FFT routine // reduce the result mod (Phi_m(X),q) and copy to the output polynomial x { FHE_NTIMER_START(iFFT_division); rem(x, x, *phimx); // out %= (Phi_m(X),q) } // normalize zz_p mm_inv; conv(mm_inv, m_inv); x *= mm_inv; }
void inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A) { long n = A.NumRows(); if (A.NumCols() != n) LogicError("inv: nonsquare matrix"); if (n == 0) { set(d); X.SetDims(0, 0); return; } long i, j, k, pos; zz_p t1, t2, t3; zz_p *x, *y; mat_zz_p M; M.SetDims(n, 2*n); for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { M[i][j] = A[i][j]; clear(M[i][n+j]); } set(M[i][n+i]); } zz_p det; set(det); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); for (k = 0; k < n; k++) { pos = -1; for (i = k; i < n; i++) { if (!IsZero(M[i][k])) { pos = i; break; } } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); negate(det, det); } mul(det, det, M[k][k]); inv(t3, M[k][k]); M[k][k] = t3; for (i = k+1; i < n; i++) { // M[i] = M[i] - M[k]*M[i,k]*t3 mul(t1, M[i][k], t3); negate(t1, t1); x = M[i].elts() + (k+1); y = M[k].elts() + (k+1); long T1 = rep(t1); mulmod_precon_t t1pinv = PrepMulModPrecon(T1, p, pinv); // T1*pinv; long T2; for (j = k+1; j < 2*n; j++, x++, y++) { // *x = *x + (*y)*t1 T2 = MulModPrecon(rep(*y), T1, p, t1pinv); x->LoopHole() = AddMod(rep(*x), T2, p); } } } else { clear(d); return; } } X.SetDims(n, n); for (k = 0; k < n; k++) { for (i = n-1; i >= 0; i--) { clear(t1); for (j = i+1; j < n; j++) { mul(t2, X[j][k], M[i][j]); add(t1, t1, t2); } sub(t1, M[i][n+k], t1); mul(X[i][k], t1, M[i][i]); } } d = det; }
void FFT(long* A, const long* a, long k, long q, const long* root, FFTMultipliers& tab) // performs a 2^k-point convolution modulo q { if (k <= 1) { if (k == 0) { A[0] = a[0]; return; } if (k == 1) { long a0 = AddMod(a[0], a[1], q); long a1 = SubMod(a[0], a[1], q); A[0] = a0; A[1] = a1; return; } } // assume k > 1 if (k > tab.MaxK) PrecompFFTMultipliers(k, q, root, tab); NTL_THREAD_LOCAL static Vec<long> AA_store; AA_store.SetLength(1L << k); long *AA = AA_store.elts(); BitReverseCopy(AA, a, k); long n = 1L << k; long s, m, m_half, m_fourth, i, j, t, u, t1, u1, tt, tt1; // s = 1 for (i = 0; i < n; i += 2) { t = AA[i + 1]; u = AA[i]; AA[i] = AddMod(u, t, q); AA[i+1] = SubMod(u, t, q); } for (s = 2; s < k; s++) { m = 1L << s; m_half = 1L << (s-1); m_fourth = 1L << (s-2); const long* wtab = tab.wtab_precomp[s].elts(); const mulmod_precon_t *wqinvtab = tab.wqinvtab_precomp[s].elts(); for (i = 0; i < n; i+= m) { long *AA0 = &AA[i]; long *AA1 = &AA[i + m_half]; #if (NTL_PIPELINE) // pipelining: seems to be faster t = AA1[0]; u = AA0[0]; t1 = MulModPrecon(AA1[1], wtab[1], q, wqinvtab[1]); u1 = AA0[1]; for (j = 0; j < m_half-2; j += 2) { long a02 = AA0[j+2]; long a03 = AA0[j+3]; long a12 = AA1[j+2]; long a13 = AA1[j+3]; long w2 = wtab[j+2]; long w3 = wtab[j+3]; mulmod_precon_t wqi2 = wqinvtab[j+2]; mulmod_precon_t wqi3 = wqinvtab[j+3]; tt = MulModPrecon(a12, w2, q, wqi2); long b00 = AddMod(u, t, q); long b10 = SubMod(u, t, q); tt1 = MulModPrecon(a13, w3, q, wqi3); long b01 = AddMod(u1, t1, q); long b11 = SubMod(u1, t1, q); AA0[j] = b00; AA1[j] = b10; AA0[j+1] = b01; AA1[j+1] = b11; t = tt; u = a02; t1 = tt1; u1 = a03; } AA0[j] = AddMod(u, t, q); AA1[j] = SubMod(u, t, q); AA0[j + 1] = AddMod(u1, t1, q); AA1[j + 1] = SubMod(u1, t1, q); } #else for (j = 0; j < m_half; j += 2) { const long a00 = AA0[j]; const long a01 = AA0[j+1]; const long a10 = AA1[j]; const long a11 = AA1[j+1]; const long w0 = wtab[j]; const long w1 = wtab[j+1]; const mulmod_precon_t wqi0 = wqinvtab[j]; const mulmod_precon_t wqi1 = wqinvtab[j+1]; const long tt = MulModPrecon(a10, w0, q, wqi0); const long uu = a00; const long b00 = AddMod(uu, tt, q); const long b10 = SubMod(uu, tt, q); const long tt1 = MulModPrecon(a11, w1, q, wqi1); const long uu1 = a01; const long b01 = AddMod(uu1, tt1, q); const long b11 = SubMod(uu1, tt1, q); AA0[j] = b00; AA0[j+1] = b01; AA1[j] = b10; AA1[j+1] = b11; } } #endif }
static void PrecompFFTMultipliers(long k, long q, const long *root, FFTMultipliers& tab) { if (k < 1) Error("PrecompFFTMultipliers: bad input"); if (k <= tab.MaxK) return; tab.wtab_precomp.SetLength(k+1); tab.wqinvtab_precomp.SetLength(k+1); double qinv = 1/((double) q); if (tab.MaxK == -1) { tab.wtab_precomp[1].SetLength(1); tab.wqinvtab_precomp[1].SetLength(1); tab.wtab_precomp[1][0] = 1; tab.wqinvtab_precomp[1][0] = PrepMulModPrecon(1, q, qinv); tab.MaxK = 1; } for (long s = tab.MaxK+1; s <= k; s++) { tab.wtab_precomp[s].SetLength(1L << (s-1)); tab.wqinvtab_precomp[s].SetLength(1L << (s-1)); long m = 1L << s; long m_half = 1L << (s-1); long m_fourth = 1L << (s-2); long *wtab_last = tab.wtab_precomp[s-1].elts(); mulmod_precon_t *wqinvtab_last = tab.wqinvtab_precomp[s-1].elts(); long *wtab = tab.wtab_precomp[s].elts(); mulmod_precon_t *wqinvtab = tab.wqinvtab_precomp[s].elts(); for (long i = 0; i < m_fourth; i++) { wtab[i] = wtab_last[i]; wqinvtab[i] = wqinvtab_last[i]; } long w = root[s]; mulmod_precon_t wqinv = PrepMulModPrecon(w, q, qinv); // prepare wtab... if (s == 2) { wtab[1] = MulModPrecon(wtab[0], w, q, wqinv); wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); } else { // some software pipelining long i, j; i = m_half-1; j = m_fourth-1; wtab[i-1] = wtab[j]; wqinvtab[i-1] = wqinvtab[j]; wtab[i] = MulModPrecon(wtab[i-1], w, q, wqinv); i -= 2; j --; for (; i >= 0; i -= 2, j --) { long wp2 = wtab[i+2]; long wm1 = wtab[j]; wqinvtab[i+2] = PrepMulModPrecon(wp2, q, qinv); wtab[i-1] = wm1; wqinvtab[i-1] = wqinvtab[j]; wtab[i] = MulModPrecon(wm1, w, q, wqinv); } wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); } } tab.MaxK = k; }
void FFT(long* A, const long* a, long k, long q, const long* root) // performs a 2^k-point convolution modulo q { if (k <= 1) { if (k == 0) { A[0] = a[0]; return; } if (k == 1) { long a0 = AddMod(a[0], a[1], q); long a1 = SubMod(a[0], a[1], q); A[0] = a0; A[1] = a1; return; } } // assume k > 1 NTL_THREAD_LOCAL static Vec<long> wtab_store; NTL_THREAD_LOCAL static Vec<mulmod_precon_t> wqinvtab_store; NTL_THREAD_LOCAL static Vec<long> AA_store; wtab_store.SetLength(1L << (k-2)); wqinvtab_store.SetLength(1L << (k-2)); AA_store.SetLength(1L << k); long * NTL_RESTRICT wtab = wtab_store.elts(); mulmod_precon_t * NTL_RESTRICT wqinvtab = wqinvtab_store.elts(); long *AA = AA_store.elts(); double qinv = 1/((double) q); wtab[0] = 1; wqinvtab[0] = PrepMulModPrecon(1, q, qinv); BitReverseCopy(AA, a, k); long n = 1L << k; long s, m, m_half, m_fourth, i, j, t, u, t1, u1, tt, tt1; long w; mulmod_precon_t wqinv; // s = 1 for (i = 0; i < n; i += 2) { t = AA[i + 1]; u = AA[i]; AA[i] = AddMod(u, t, q); AA[i+1] = SubMod(u, t, q); } for (s = 2; s < k; s++) { m = 1L << s; m_half = 1L << (s-1); m_fourth = 1L << (s-2); w = root[s]; wqinv = PrepMulModPrecon(w, q, qinv); // prepare wtab... if (s == 2) { wtab[1] = MulModPrecon(wtab[0], w, q, wqinv); wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); } else { // some software pipelining i = m_half-1; j = m_fourth-1; wtab[i-1] = wtab[j]; wqinvtab[i-1] = wqinvtab[j]; wtab[i] = MulModPrecon(wtab[i-1], w, q, wqinv); i -= 2; j --; for (; i >= 0; i -= 2, j --) { long wp2 = wtab[i+2]; long wm1 = wtab[j]; wqinvtab[i+2] = PrepMulModPrecon(wp2, q, qinv); wtab[i-1] = wm1; wqinvtab[i-1] = wqinvtab[j]; wtab[i] = MulModPrecon(wm1, w, q, wqinv); } wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); } for (i = 0; i < n; i+= m) { long * NTL_RESTRICT AA0 = &AA[i]; long * NTL_RESTRICT AA1 = &AA[i + m_half]; t = AA1[0]; u = AA0[0]; t1 = MulModPrecon(AA1[1], w, q, wqinv); u1 = AA0[1]; for (j = 0; j < m_half-2; j += 2) { long a02 = AA0[j+2]; long a03 = AA0[j+3]; long a12 = AA1[j+2]; long a13 = AA1[j+3]; long w2 = wtab[j+2]; long w3 = wtab[j+3]; mulmod_precon_t wqi2 = wqinvtab[j+2]; mulmod_precon_t wqi3 = wqinvtab[j+3]; tt = MulModPrecon(a12, w2, q, wqi2); long b00 = AddMod(u, t, q); long b10 = SubMod(u, t, q); t = tt; u = a02; tt1 = MulModPrecon(a13, w3, q, wqi3); long b01 = AddMod(u1, t1, q); long b11 = SubMod(u1, t1, q); t1 = tt1; u1 = a03; AA0[j] = b00; AA1[j] = b10; AA0[j+1] = b01; AA1[j+1] = b11; } AA0[j] = AddMod(u, t, q); AA1[j] = SubMod(u, t, q); AA0[j + 1] = AddMod(u1, t1, q); AA1[j + 1] = SubMod(u1, t1, q); } } // s == k...special case m = 1L << s; m_half = 1L << (s-1); m_fourth = 1L << (s-2); w = root[s]; wqinv = PrepMulModPrecon(w, q, qinv); // j = 0, 1 t = AA[m_half]; u = AA[0]; t1 = MulModPrecon(AA[1+ m_half], w, q, wqinv); u1 = AA[1]; A[0] = AddMod(u, t, q); A[m_half] = SubMod(u, t, q); A[1] = AddMod(u1, t1, q); A[1 + m_half] = SubMod(u1, t1, q); for (j = 2; j < m_half; j += 2) { t = MulModPrecon(AA[j + m_half], wtab[j >> 1], q, wqinvtab[j >> 1]); u = AA[j]; t1 = MulModPrecon(AA[j + 1+ m_half], wtab[j >> 1], q, wqinvtab[j >> 1]); t1 = MulModPrecon(t1, w, q, wqinv); u1 = AA[j + 1]; A[j] = AddMod(u, t, q); A[j + m_half] = SubMod(u, t, q); A[j + 1] = AddMod(u1, t1, q); A[j + 1 + m_half] = SubMod(u1, t1, q); } }
long gauss(mat_zz_p& M, long w) { long k, l; long i, j; long pos; zz_p t1, t2, t3; zz_p *x, *y; long n = M.NumRows(); long m = M.NumCols(); if (w < 0 || w > m) LogicError("gauss: bad args"); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long T1, T2; l = 0; for (k = 0; k < w && l < n; k++) { pos = -1; for (i = l; i < n; i++) { if (!IsZero(M[i][k])) { pos = i; break; } } if (pos != -1) { swap(M[pos], M[l]); inv(t3, M[l][k]); negate(t3, t3); for (i = l+1; i < n; i++) { // M[i] = M[i] + M[l]*M[i,k]*t3 mul(t1, M[i][k], t3); T1 = rep(t1); mulmod_precon_t T1pinv = PrepMulModPrecon(T1, p, pinv); clear(M[i][k]); x = M[i].elts() + (k+1); y = M[l].elts() + (k+1); for (j = k+1; j < m; j++, x++, y++) { // *x = *x + (*y)*t1 T2 = MulModPrecon(rep(*y), T1, p, T1pinv); T2 = AddMod(T2, rep(*x), p); (*x).LoopHole() = T2; } } l++; } } return l; }
static void addSome1Dmats4dim(FHESecKey& sKey, long i, long bound, long keyID) { const FHEcontext &context = sKey.getContext(); long m = context.zMStar.getM(); computeParams(context,m,i); // defines vars: native, ord, gi, g2md, giminv, g2mdminv long baby, giant; std::tie(baby,giant) = computeSteps(ord, bound, native); for (long j=1,val=gi; j<=baby; j++) { // Add matrices for baby steps sKey.GenKeySWmatrix(1, val, keyID, keyID); if (!native) { long val2 = MulModPrecon(val,g2md,m,g2mdminv); sKey.GenKeySWmatrix(1, val2, keyID, keyID); } val = MulModPrecon(val, gi, m, giminv); // val *= g mod m (= g^{j+1}) } long gb = PowerMod(gi,baby,m); // g^baby NTL::mulmod_precon_t gbminv = PrepMulModPrecon(gb, m); for (long j=2,val=gb; j < giant; j++) { // Add matrices for giant steps val = MulModPrecon(val, gb, m, gbminv); // val = g^{(j+1)*baby} sKey.GenKeySWmatrix(1, val, keyID, keyID); } if (!native) { sKey.GenKeySWmatrix(1, context.zMStar.genToPow(i, -ord), keyID, keyID); } // VJS: experimantal feature...because the replication code // uses rotations by -1, -2, -4, -8, we add a few // of these as well...only the small ones are important, // and we only need them if SameOrd(i)... // Note: we do indeed get a nontrivial speed-up if (native && i<context.zMStar.numOfGens()) { for (long k = 1; k < giant; k = 2*k) { long j = ord - k; long val = PowerMod(gi, j, m); // val = g^j sKey.GenKeySWmatrix(1, val, keyID, keyID); } } #if 0 MAUTO // build the tree for this dimension, the internal nodes are 1 and // (subset of) gi^{giant}, gi^{2*giant}, ..., gi^{baby*giant}. We MAUTO sKey.resetTree(i,keyID); // remove existing tree, if any // keep a list of all the elements that are covered by the tree so far, // initialized to only the root (=1). std::unordered_set<long> covered({1}); // Make a list of the automorphisms for this dimension std::vector<long> autos; for (long j=1,val=gi; j<ord; j++) { // Do we have matrices for val and/or val/gi^{di}? if (!native) { long val2 = MulModPrecon(val, g2md, m, g2mdminv); if (sKey.haveKeySWmatrix(1,val2,keyID,keyID)) { autos.push_back(val2); } } if (sKey.haveKeySWmatrix(1,val,keyID,keyID)) { autos.push_back(val); } val = MulModPrecon(val, gi, m, giminv); // g^{j+1} } // Insert internal nodes and their children to tree for (long j=0,fromVal=1; j<giant; j++) { NTL::mulmod_precon_t fromminv = PrepMulModPrecon(fromVal, m); vector<long> children; for (long k: autos) { long toVal = MulModPrecon(k, fromVal, m, fromminv); if (covered.count(toVal)==0) { // toVal not covered yet covered.insert(toVal); children.push_back(toVal); } } if (!children.empty()) { // insert fromVal with its children sKey.add2tree(i, fromVal, children, keyID); } fromVal = MulModPrecon(fromVal, gb, m, gbminv); // g^{(j+1)*baby} } // Sanity-check, did we cover everything? long toCover = native? ord: (2*ord-1); if (covered.size()<toCover) cerr << "**Warning: order-"<<ord<<" dimension, covered "<<covered.size() << " of "<<toCover<<endl; #endif }