void InnerProduct(zz_pX& x, const vec_zz_p& v, long low, long high, const vec_zz_pX& H, long n, vec_zz_p& t) { zz_p s; long i, j; zz_p *tp = t.elts(); for (j = 0; j < n; j++) clear(tp[j]); long p = zz_p::modulus(); double pinv = zz_p::ModulusInverse(); high = min(high, v.length()-1); for (i = low; i <= high; i++) { const vec_zz_p& h = H[i-low].rep; long m = h.length(); zz_p w = (v[i]); long W = rep(w); mulmod_precon_t Wpinv = PrepMulModPrecon(W, p, pinv); // ((double) W)*pinv; const zz_p *hp = h.elts(); for (j = 0; j < m; j++) { long S = MulModPrecon(rep(hp[j]), W, p, Wpinv); S = AddMod(S, rep(tp[j]), p); tp[j].LoopHole() = S; } } x.rep = t; x.normalize(); }
bool intVecCRT(vec_ZZ& vp, const ZZ& p, const zzvec& vq, long q) { long pInv = InvMod(rem(p,q), q); // p^{-1} mod q long n = min(vp.length(),vq.length()); long q_over_2 = q/2; ZZ tmp; long vqi; mulmod_precon_t pqInv = PrepMulModPrecon(pInv, q); for (long i=0; i<n; i++) { conv(vqi, vq[i]); // convert to single precision long vq_minus_vp_mod_q = SubMod(vqi, rem(vp[i],q), q); long delta_times_pInv = MulModPrecon(vq_minus_vp_mod_q, pInv, q, pqInv); if (delta_times_pInv > q_over_2) delta_times_pInv -= q; mul(tmp, delta_times_pInv, p); // tmp = [(vq_i-vp_i)*p^{-1}]_q * p vp[i] += tmp; } // other entries (if any) are 0 mod q for (long i=vq.length(); i<vp.length(); i++) { long minus_vp_mod_q = NegateMod(rem(vp[i],q), q); long delta_times_pInv = MulModPrecon(minus_vp_mod_q, pInv, q, pqInv); if (delta_times_pInv > q_over_2) delta_times_pInv -= q; mul(tmp, delta_times_pInv, p); // tmp = [(vq_i-vp_i)*p^{-1}]_q * p vp[i] += tmp; } return (vp.length()==vq.length()); }
void mul(mat_zz_p& X, const mat_zz_p& A, zz_p b) { long n = A.NumRows(); long m = A.NumCols(); X.SetDims(n, m); long i, j; if (n == 0 || m == 0 || (n == 1 && m == 1)) { for (i = 0; i < n; i++) for (j = 0; j < m; j++) mul(X[i][j], A[i][j], b); } else { long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long bb = rep(b); mulmod_precon_t bpinv = PrepMulModPrecon(bb, p, pinv); for (i = 0; i < n; i++) { const zz_p *ap = A[i].elts(); zz_p *xp = X[i].elts(); for (j = 0; j < m; j++) xp[j].LoopHole() = MulModPrecon(rep(ap[j]), bb, p, bpinv); } } }
void mul(vec_zz_p& x, const vec_zz_p& a, zz_p b) { long n = a.length(); x.SetLength(n); long i; if (n <= 1) { for (i = 0; i < n; i++) mul(x[i], a[i], b); } else { long p = zz_p::modulus(); double pinv = zz_p::ModulusInverse(); long bb = rep(b); mulmod_precon_t bpinv = PrepMulModPrecon(bb, p, pinv); const zz_p *ap = a.elts(); zz_p *xp = x.elts(); for (i = 0; i < n; i++) xp[i].LoopHole() = MulModPrecon(rep(ap[i]), bb, p, bpinv); } }
void mul_aux(vec_zz_p& x, const mat_zz_p& A, const vec_zz_p& b) { long n = A.NumRows(); long l = A.NumCols(); if (l != b.length()) LogicError("matrix mul: dimension mismatch"); x.SetLength(n); zz_p* xp = x.elts(); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long i, k; long acc, tmp; const zz_p* bp = b.elts(); if (n <= 1) { for (i = 0; i < n; i++) { acc = 0; const zz_p* ap = A[i].elts(); for (k = 0; k < l; k++) { tmp = MulMod(rep(ap[k]), rep(bp[k]), p, pinv); acc = AddMod(acc, tmp, p); } xp[i].LoopHole() = acc; } } else { Vec<mulmod_precon_t>::Watcher watch_precon_vec(precon_vec); precon_vec.SetLength(l); mulmod_precon_t *bpinv = precon_vec.elts(); for (k = 0; k < l; k++) bpinv[k] = PrepMulModPrecon(rep(bp[k]), p, pinv); for (i = 0; i < n; i++) { acc = 0; const zz_p* ap = A[i].elts(); for (k = 0; k < l; k++) { tmp = MulModPrecon(rep(ap[k]), rep(bp[k]), p, bpinv[k]); acc = AddMod(acc, tmp, p); } xp[i].LoopHole() = acc; } } }
long polyEvalMod(const ZZX& poly, long x, long p) { long ret = 0; x %= p; if (x<0) x += p; mulmod_precon_t xpinv = PrepMulModPrecon(x, p); for (long i=deg(poly); i>=0; i--) { long coeff = rem(poly[i], p); ret = AddMod(ret, coeff, p); // Add the coefficient of x^i if (i>0) ret = MulModPrecon(ret, x, p, xpinv); // then mult by x } return ret; }
// multiply the polynomial f by the integer a modulo q void MulMod(ZZX& out, const ZZX& f, long a, long q, bool abs/*default=true*/) { // ensure that out has the same degree as f out.SetMaxLength(deg(f)+1); // allocate space if needed if (deg(out)>deg(f)) trunc(out,out,deg(f)+1); // remove high degrees mulmod_precon_t aqinv = PrepMulModPrecon(a, q); for (long i=0; i<=deg(f); i++) { long c = rem(coeff(f,i), q); c = MulModPrecon(c, a, q, aqinv); // returns c \in [0,q-1] if (!abs && c >= q/2) c -= q; SetCoeff(out,i,c); } }
void InitFFTPrimeInfo(FFTPrimeInfo& info, long q, long w, long bigtab) { double qinv = 1/((double) q); long mr = CalcMaxRoot(q); info.q = q; info.qinv = qinv; info.zz_p_context = 0; info.RootTable.SetLength(mr+1); info.RootInvTable.SetLength(mr+1); info.TwoInvTable.SetLength(mr+1); info.TwoInvPreconTable.SetLength(mr+1); long *rt = &info.RootTable[0]; long *rit = &info.RootInvTable[0]; long *tit = &info.TwoInvTable[0]; mulmod_precon_t *tipt = &info.TwoInvPreconTable[0]; long j; long t; rt[mr] = w; for (j = mr-1; j >= 0; j--) rt[j] = MulMod(rt[j+1], rt[j+1], q); rit[mr] = InvMod(w, q); for (j = mr-1; j >= 0; j--) rit[j] = MulMod(rit[j+1], rit[j+1], q); t = InvMod(2, q); tit[0] = 1; for (j = 1; j <= mr; j++) tit[j] = MulMod(tit[j-1], t, q); for (j = 0; j <= mr; j++) tipt[j] = PrepMulModPrecon(tit[j], q, qinv); info.bigtab = bigtab; }
// plaintextAutomorph: an auxilliary routine...maybe palce in NumbTh? // Compute b(X) = a(X^k) mod Phi_m(X). Result is calclated in the output b // "in place", so a should not alias b. template <class RX, class RXModulus> static void plaintextAutomorph(RX& b, const RX& a, long k, const PAlgebra& zMStar, const RXModulus& PhimX) { long m = zMStar.getM(); assert(zMStar.inZmStar(k)); b.SetLength(m); for (long j = 0; j < m; j++) b[j] = 0; long d = deg(a); // compute b(X) = a(X^k) mod (X^m-1) mulmod_precon_t precon = PrepMulModPrecon(k, m); for (long j = 0; j <= d; j++) b[MulModPrecon(j, k, m, precon)] = a[j]; // b[j*k mod m] = a[j] b.normalize(); rem(b, b, PhimX); // reduce modulo the m'th cyclotomic }
void BluesteinInit(long n, const zz_p& root, zz_pX& powers, Vec<mulmod_precon_t>& powers_aux, fftRep& Rb) { long p = zz_p::modulus(); zz_p one; one=1; powers.SetMaxLength(n); SetCoeff(powers,0,one); for (long i=1; i<n; i++) { long iSqr = MulMod(i, i, 2*n); // i^2 mod 2n SetCoeff(powers,i, power(root,iSqr)); // powers[i] = root^{i^2} } // powers_aux tracks powers powers_aux.SetLength(n); for (long i = 0; i < n; i++) powers_aux[i] = PrepMulModPrecon(rep(powers[i]), p); long k = NextPowerOfTwo(2*n-1); long k2 = 1L << k; // k2 = 2^k Rb.SetSize(k); zz_pX b(INIT_SIZE, k2); zz_p rInv = inv(root); SetCoeff(b,n-1,one); // b[n-1] = 1 for (long i=1; i<n; i++) { long iSqr = MulMod(i, i, 2*n); // i^2 mod 2n zz_p bi = power(rInv,iSqr); SetCoeff(b,n-1+i, bi); // b[n-1+i] = b[n-1-i] = root^{-i^2} SetCoeff(b,n-1-i,bi); } TofftRep(Rb, b, k); }
// Expand index set by s1, and multiply by \prod{q \in s1}. s1 is assumed to // be disjoint from the current index set. Returns the logarithm of product. double DoubleCRT::addPrimesAndScale(const IndexSet& s1) { if (empty(s1)) return 0.0; // nothing to do assert(empty(s1 & map.getIndexSet())); // s1 is disjoint from *this // compute factor to scale existing rows ZZ factor = to_ZZ(1); double logFactor = 0.0; for (long i = s1.first(); i <= s1.last(); i = s1.next(i)) { long qi = context.ithPrime(i); factor *= qi; logFactor += log((double)qi); } // scale existing rows long phim = context.zMStar.getPhiM(); const IndexSet& iSet = map.getIndexSet(); for (long i = iSet.first(); i <= iSet.last(); i = iSet.next(i)) { long qi = context.ithPrime(i); long f = rem(factor, qi); // f = factor % qi vec_long& row = map[i]; // scale row by a factor of f modulo qi mulmod_precon_t bninv = PrepMulModPrecon(f, qi, 1.0/(double)qi); for (long j=0; j<phim; j++) row[j] = MulModPrecon(row[j], f, qi, bninv); } // insert new rows and fill them with zeros map.insert(s1); // add new rows to the map for (long i = s1.first(); i <= s1.last(); i = s1.next(i)) { vec_long& row = map[i]; for (long j=0; j<phim; j++) row[j] = 0; } return logFactor; }
void ZZ_p::DoInstall() { SmartPtr<ZZ_pTmpSpaceT> tmps = 0; do { // NOTE: thread safe lazy init Lazy<ZZ_pFFTInfoT>::Builder builder(ZZ_pInfo->FFTInfo); if (!builder()) break; UniquePtr<ZZ_pFFTInfoT> FFTInfo; FFTInfo.make(); ZZ B, M, M1, M2, M3; long n, i; long q, t; mulmod_t qinv; sqr(B, ZZ_pInfo->p); LeftShift(B, B, NTL_FFTMaxRoot+NTL_FFTFudge); // FIXME: the following is quadratic time...would // be nice to get a faster solution... // One could estimate the # of primes by summing logs, // then multiply using a tree-based multiply, then // adjust up or down... // Assuming IEEE floating point, the worst case estimate // for error guarantees a correct answer +/- 1 for // numprimes up to 2^25...for sure we won't be // using that many primes...we can certainly put in // a sanity check, though. // If I want a more accuaruate summation (with using Kahan, // which has some portability issues), I could represent // numbers as x = a + f, where a is integer and f is the fractional // part. Summing in this representation introduces an *absolute* // error of 2 epsilon n, which is just as good as Kahan // for this application. // same strategy could also be used in the ZZX HomMul routine, // if we ever want to make that subquadratic set(M); n = 0; while (M <= B) { UseFFTPrime(n); q = GetFFTPrime(n); n++; mul(M, M, q); } FFTInfo->NumPrimes = n; FFTInfo->MaxRoot = CalcMaxRoot(q); double fn = double(n); if (8.0*fn*(fn+48) > NTL_FDOUBLE_PRECISION) ResourceError("modulus too big"); if (8.0*fn*(fn+48) <= NTL_FDOUBLE_PRECISION/double(NTL_SP_BOUND)) FFTInfo->QuickCRT = true; else FFTInfo->QuickCRT = false; // FIXME: some of this stuff does not need to be initialized // at all if FFTInfo->crt_struct.special() FFTInfo->x.SetLength(n); FFTInfo->u.SetLength(n); FFTInfo->uqinv.SetLength(n); FFTInfo->rem_struct.init(n, ZZ_pInfo->p, GetFFTPrime); FFTInfo->crt_struct.init(n, ZZ_pInfo->p, GetFFTPrime); if (!FFTInfo->crt_struct.special()) { ZZ qq, rr; DivRem(qq, rr, M, ZZ_pInfo->p); NegateMod(FFTInfo->MinusMModP, rr, ZZ_pInfo->p); for (i = 0; i < n; i++) { q = GetFFTPrime(i); qinv = GetFFTPrimeInv(i); long tt = rem(qq, q); mul(M2, ZZ_pInfo->p, tt); add(M2, M2, rr); div(M2, M2, q); // = (M/q) rem p div(M1, M, q); t = rem(M1, q); t = InvMod(t, q); mul(M3, M2, t); rem(M3, M3, ZZ_pInfo->p); FFTInfo->crt_struct.insert(i, M3); FFTInfo->x[i] = ((double) t)/((double) q); FFTInfo->u[i] = t; FFTInfo->uqinv[i] = PrepMulModPrecon(FFTInfo->u[i], q, qinv); } } tmps = MakeSmart<ZZ_pTmpSpaceT>(); tmps->crt_tmp_vec.fetch(FFTInfo->crt_struct); tmps->rem_tmp_vec.fetch(FFTInfo->rem_struct); builder.move(FFTInfo); } while (0); if (!tmps) { const ZZ_pFFTInfoT *FFTInfo = ZZ_pInfo->FFTInfo.get(); tmps = MakeSmart<ZZ_pTmpSpaceT>(); tmps->crt_tmp_vec.fetch(FFTInfo->crt_struct); tmps->rem_tmp_vec.fetch(FFTInfo->rem_struct); } ZZ_pTmpSpace = tmps; }
// Constructor: it is assumed that zms is already set with m>1 // If q == 0, then the current context is used Cmodulus::Cmodulus(const PAlgebra &zms, long qq, long rt) { assert(zms.getM()>1); bool explicitModulus = true; if (qq == 0) { q = zz_p::modulus(); explicitModulus = false; } else q = qq; zMStar = &zms; root = rt; long mm; mm = zms.getM(); m_inv = InvMod(mm, q); zz_pBak bak; if (zms.getPow2()) { // special case when m is a power of 2 assert( explicitModulus ); bak.save(); RandomState state; SetSeed(conv<ZZ>("84547180875373941534287406458029")); // DIRT: this ensures the roots are deterministically generated // inside the zz_pContext constructor context = zz_pContext(INIT_USER_FFT, q); state.restore(); context.restore(); powers.set_ptr(new zz_pX); ipowers.set_ptr(new zz_pX); long k = zms.getPow2(); long phim = 1L << (k-1); assert(k <= zz_pInfo->MaxRoot); // rootTables get initialized 0..zz_pInfo->Maxroot #ifdef FHE_OPENCL altFFTInfo = MakeSmart<AltFFTPrimeInfo>(); InitAltFFTPrimeInfo(*altFFTInfo, *zz_pInfo->p_info, k-1); #endif long w0 = zz_pInfo->p_info->RootTable[0][k]; long w1 = zz_pInfo->p_info->RootTable[1][k]; powers->rep.SetLength(phim); powers_aux.SetLength(phim); for (long i = 0, w = 1; i < phim; i++) { powers->rep[i] = w; powers_aux[i] = PrepMulModPrecon(w, q); w = MulMod(w, w0, q); } ipowers->rep.SetLength(phim); ipowers_aux.SetLength(phim); for (long i = 0, w = 1; i < phim; i++) { ipowers->rep[i] = w; ipowers_aux[i] = PrepMulModPrecon(w, q); w = MulMod(w, w1, q); } return; } if (explicitModulus) { bak.save(); // backup the current modulus context = BuildContext(q, NextPowerOfTwo(zms.getM()) + 1); context.restore(); // set NTL's current modulus to q } else context.save(); if (root==0) { // Find a 2m-th root of unity modulo q, if not given zz_p rtp; long e = 2*zms.getM(); FindPrimitiveRoot(rtp,e); // NTL routine, relative to current modulus if (rtp==0) // sanity check Error("Cmod::compRoots(): no 2m'th roots of unity mod q"); root = rep(rtp); } rInv = InvMod(root,q); // set rInv = root^{-1} mod q // Allocate memory (relative to current modulus that was defined above). // These objects will be initialized when anyone calls FFT/iFFT. zz_pX phimx_poly; conv(phimx_poly, zms.getPhimX()); powers.set_ptr(new zz_pX); Rb.set_ptr(new fftRep); ipowers.set_ptr(new zz_pX); iRb.set_ptr(new fftRep); phimx.set_ptr(new zz_pXModulus1(zms.getM(), phimx_poly)); BluesteinInit(mm, conv<zz_p>(root), *powers, powers_aux, *Rb); BluesteinInit(mm, conv<zz_p>(rInv), *ipowers, ipowers_aux, *iRb); }
void determinant(zz_p& d, const mat_zz_p& M_in) { long k, n; long i, j; long pos; zz_p t1, t2, t3; zz_p *x, *y; mat_zz_p M; M = M_in; n = M.NumRows(); if (M.NumCols() != n) LogicError("determinant: nonsquare matrix"); if (n == 0) { set(d); return; } zz_p det; set(det); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); for (k = 0; k < n; k++) { pos = -1; for (i = k; i < n; i++) { if (!IsZero(M[i][k])) { pos = i; break; } } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); negate(det, det); } mul(det, det, M[k][k]); inv(t3, M[k][k]); for (i = k+1; i < n; i++) { // M[i] = M[i] - M[k]*M[i,k]*t3 mul(t1, M[i][k], t3); negate(t1, t1); x = M[i].elts() + (k+1); y = M[k].elts() + (k+1); long T1 = rep(t1); mulmod_precon_t t1pinv = PrepMulModPrecon(T1, p, pinv); // T1*pinv; long T2; for (j = k+1; j < n; j++, x++, y++) { // *x = *x + (*y)*t1 T2 = MulModPrecon(rep(*y), T1, p, t1pinv); x->LoopHole() = AddMod(rep(*x), T2, p); } } } else { clear(d); return; } } d = det; }
static void PrecompFFTMultipliers(long k, long q, const long *root, FFTMultipliers& tab) { if (k < 1) Error("PrecompFFTMultipliers: bad input"); if (k <= tab.MaxK) return; tab.wtab_precomp.SetLength(k+1); tab.wqinvtab_precomp.SetLength(k+1); double qinv = 1/((double) q); if (tab.MaxK == -1) { tab.wtab_precomp[1].SetLength(1); tab.wqinvtab_precomp[1].SetLength(1); tab.wtab_precomp[1][0] = 1; tab.wqinvtab_precomp[1][0] = PrepMulModPrecon(1, q, qinv); tab.MaxK = 1; } for (long s = tab.MaxK+1; s <= k; s++) { tab.wtab_precomp[s].SetLength(1L << (s-1)); tab.wqinvtab_precomp[s].SetLength(1L << (s-1)); long m = 1L << s; long m_half = 1L << (s-1); long m_fourth = 1L << (s-2); long *wtab_last = tab.wtab_precomp[s-1].elts(); mulmod_precon_t *wqinvtab_last = tab.wqinvtab_precomp[s-1].elts(); long *wtab = tab.wtab_precomp[s].elts(); mulmod_precon_t *wqinvtab = tab.wqinvtab_precomp[s].elts(); for (long i = 0; i < m_fourth; i++) { wtab[i] = wtab_last[i]; wqinvtab[i] = wqinvtab_last[i]; } long w = root[s]; mulmod_precon_t wqinv = PrepMulModPrecon(w, q, qinv); // prepare wtab... if (s == 2) { wtab[1] = MulModPrecon(wtab[0], w, q, wqinv); wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); } else { // some software pipelining long i, j; i = m_half-1; j = m_fourth-1; wtab[i-1] = wtab[j]; wqinvtab[i-1] = wqinvtab[j]; wtab[i] = MulModPrecon(wtab[i-1], w, q, wqinv); i -= 2; j --; for (; i >= 0; i -= 2, j --) { long wp2 = wtab[i+2]; long wm1 = wtab[j]; wqinvtab[i+2] = PrepMulModPrecon(wp2, q, qinv); wtab[i-1] = wm1; wqinvtab[i-1] = wqinvtab[j]; wtab[i] = MulModPrecon(wm1, w, q, wqinv); } wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); } } tab.MaxK = k; }
void FFT(long* A, const long* a, long k, long q, const long* root) // performs a 2^k-point convolution modulo q { if (k <= 1) { if (k == 0) { A[0] = a[0]; return; } if (k == 1) { long a0 = AddMod(a[0], a[1], q); long a1 = SubMod(a[0], a[1], q); A[0] = a0; A[1] = a1; return; } } // assume k > 1 NTL_THREAD_LOCAL static Vec<long> wtab_store; NTL_THREAD_LOCAL static Vec<mulmod_precon_t> wqinvtab_store; NTL_THREAD_LOCAL static Vec<long> AA_store; wtab_store.SetLength(1L << (k-2)); wqinvtab_store.SetLength(1L << (k-2)); AA_store.SetLength(1L << k); long * NTL_RESTRICT wtab = wtab_store.elts(); mulmod_precon_t * NTL_RESTRICT wqinvtab = wqinvtab_store.elts(); long *AA = AA_store.elts(); double qinv = 1/((double) q); wtab[0] = 1; wqinvtab[0] = PrepMulModPrecon(1, q, qinv); BitReverseCopy(AA, a, k); long n = 1L << k; long s, m, m_half, m_fourth, i, j, t, u, t1, u1, tt, tt1; long w; mulmod_precon_t wqinv; // s = 1 for (i = 0; i < n; i += 2) { t = AA[i + 1]; u = AA[i]; AA[i] = AddMod(u, t, q); AA[i+1] = SubMod(u, t, q); } for (s = 2; s < k; s++) { m = 1L << s; m_half = 1L << (s-1); m_fourth = 1L << (s-2); w = root[s]; wqinv = PrepMulModPrecon(w, q, qinv); // prepare wtab... if (s == 2) { wtab[1] = MulModPrecon(wtab[0], w, q, wqinv); wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); } else { // some software pipelining i = m_half-1; j = m_fourth-1; wtab[i-1] = wtab[j]; wqinvtab[i-1] = wqinvtab[j]; wtab[i] = MulModPrecon(wtab[i-1], w, q, wqinv); i -= 2; j --; for (; i >= 0; i -= 2, j --) { long wp2 = wtab[i+2]; long wm1 = wtab[j]; wqinvtab[i+2] = PrepMulModPrecon(wp2, q, qinv); wtab[i-1] = wm1; wqinvtab[i-1] = wqinvtab[j]; wtab[i] = MulModPrecon(wm1, w, q, wqinv); } wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); } for (i = 0; i < n; i+= m) { long * NTL_RESTRICT AA0 = &AA[i]; long * NTL_RESTRICT AA1 = &AA[i + m_half]; t = AA1[0]; u = AA0[0]; t1 = MulModPrecon(AA1[1], w, q, wqinv); u1 = AA0[1]; for (j = 0; j < m_half-2; j += 2) { long a02 = AA0[j+2]; long a03 = AA0[j+3]; long a12 = AA1[j+2]; long a13 = AA1[j+3]; long w2 = wtab[j+2]; long w3 = wtab[j+3]; mulmod_precon_t wqi2 = wqinvtab[j+2]; mulmod_precon_t wqi3 = wqinvtab[j+3]; tt = MulModPrecon(a12, w2, q, wqi2); long b00 = AddMod(u, t, q); long b10 = SubMod(u, t, q); t = tt; u = a02; tt1 = MulModPrecon(a13, w3, q, wqi3); long b01 = AddMod(u1, t1, q); long b11 = SubMod(u1, t1, q); t1 = tt1; u1 = a03; AA0[j] = b00; AA1[j] = b10; AA0[j+1] = b01; AA1[j+1] = b11; } AA0[j] = AddMod(u, t, q); AA1[j] = SubMod(u, t, q); AA0[j + 1] = AddMod(u1, t1, q); AA1[j + 1] = SubMod(u1, t1, q); } } // s == k...special case m = 1L << s; m_half = 1L << (s-1); m_fourth = 1L << (s-2); w = root[s]; wqinv = PrepMulModPrecon(w, q, qinv); // j = 0, 1 t = AA[m_half]; u = AA[0]; t1 = MulModPrecon(AA[1+ m_half], w, q, wqinv); u1 = AA[1]; A[0] = AddMod(u, t, q); A[m_half] = SubMod(u, t, q); A[1] = AddMod(u1, t1, q); A[1 + m_half] = SubMod(u1, t1, q); for (j = 2; j < m_half; j += 2) { t = MulModPrecon(AA[j + m_half], wtab[j >> 1], q, wqinvtab[j >> 1]); u = AA[j]; t1 = MulModPrecon(AA[j + 1+ m_half], wtab[j >> 1], q, wqinvtab[j >> 1]); t1 = MulModPrecon(t1, w, q, wqinv); u1 = AA[j + 1]; A[j] = AddMod(u, t, q); A[j + m_half] = SubMod(u, t, q); A[j + 1] = AddMod(u1, t1, q); A[j + 1 + m_half] = SubMod(u1, t1, q); } }
static void mul_aux(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B) { long n = A.NumRows(); long l = A.NumCols(); long m = B.NumCols(); if (l != B.NumRows()) LogicError("matrix mul: dimension mismatch"); X.SetDims(n, m); if (m > 1) { // new preconditioning code long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); vec_long::Watcher watch_mul_aux_vec(mul_aux_vec); mul_aux_vec.SetLength(m); long *acc = mul_aux_vec.elts(); long i, j, k; for (i = 0; i < n; i++) { const zz_p* ap = A[i].elts(); for (j = 0; j < m; j++) acc[j] = 0; for (k = 0; k < l; k++) { long aa = rep(ap[k]); if (aa != 0) { const zz_p* bp = B[k].elts(); long T1; mulmod_precon_t aapinv = PrepMulModPrecon(aa, p, pinv); for (j = 0; j < m; j++) { T1 = MulModPrecon(rep(bp[j]), aa, p, aapinv); acc[j] = AddMod(acc[j], T1, p); } } } zz_p *xp = X[i].elts(); for (j = 0; j < m; j++) xp[j].LoopHole() = acc[j]; } } else { // just use the old code, w/o preconditioning long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long i, j, k; long acc, tmp; for (i = 1; i <= n; i++) { for (j = 1; j <= m; j++) { acc = 0; for(k = 1; k <= l; k++) { tmp = MulMod(rep(A(i,k)), rep(B(k,j)), p, pinv); acc = AddMod(acc, tmp, p); } X(i,j).LoopHole() = acc; } } } }
void inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A) { long n = A.NumRows(); if (A.NumCols() != n) LogicError("inv: nonsquare matrix"); if (n == 0) { set(d); X.SetDims(0, 0); return; } long i, j, k, pos; zz_p t1, t2, t3; zz_p *x, *y; mat_zz_p M; M.SetDims(n, 2*n); for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { M[i][j] = A[i][j]; clear(M[i][n+j]); } set(M[i][n+i]); } zz_p det; set(det); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); for (k = 0; k < n; k++) { pos = -1; for (i = k; i < n; i++) { if (!IsZero(M[i][k])) { pos = i; break; } } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); negate(det, det); } mul(det, det, M[k][k]); inv(t3, M[k][k]); M[k][k] = t3; for (i = k+1; i < n; i++) { // M[i] = M[i] - M[k]*M[i,k]*t3 mul(t1, M[i][k], t3); negate(t1, t1); x = M[i].elts() + (k+1); y = M[k].elts() + (k+1); long T1 = rep(t1); mulmod_precon_t t1pinv = PrepMulModPrecon(T1, p, pinv); // T1*pinv; long T2; for (j = k+1; j < 2*n; j++, x++, y++) { // *x = *x + (*y)*t1 T2 = MulModPrecon(rep(*y), T1, p, t1pinv); x->LoopHole() = AddMod(rep(*x), T2, p); } } } else { clear(d); return; } } X.SetDims(n, n); for (k = 0; k < n; k++) { for (i = n-1; i >= 0; i--) { clear(t1); for (j = i+1; j < n; j++) { mul(t2, X[j][k], M[i][j]); add(t1, t1, t2); } sub(t1, M[i][n+k], t1); mul(X[i][k], t1, M[i][i]); } } d = det; }
static void addSome1Dmats4dim(FHESecKey& sKey, long i, long bound, long keyID) { const FHEcontext &context = sKey.getContext(); long m = context.zMStar.getM(); computeParams(context,m,i); // defines vars: native, ord, gi, g2md, giminv, g2mdminv long baby, giant; std::tie(baby,giant) = computeSteps(ord, bound, native); for (long j=1,val=gi; j<=baby; j++) { // Add matrices for baby steps sKey.GenKeySWmatrix(1, val, keyID, keyID); if (!native) { long val2 = MulModPrecon(val,g2md,m,g2mdminv); sKey.GenKeySWmatrix(1, val2, keyID, keyID); } val = MulModPrecon(val, gi, m, giminv); // val *= g mod m (= g^{j+1}) } long gb = PowerMod(gi,baby,m); // g^baby NTL::mulmod_precon_t gbminv = PrepMulModPrecon(gb, m); for (long j=2,val=gb; j < giant; j++) { // Add matrices for giant steps val = MulModPrecon(val, gb, m, gbminv); // val = g^{(j+1)*baby} sKey.GenKeySWmatrix(1, val, keyID, keyID); } if (!native) { sKey.GenKeySWmatrix(1, context.zMStar.genToPow(i, -ord), keyID, keyID); } // VJS: experimantal feature...because the replication code // uses rotations by -1, -2, -4, -8, we add a few // of these as well...only the small ones are important, // and we only need them if SameOrd(i)... // Note: we do indeed get a nontrivial speed-up if (native && i<context.zMStar.numOfGens()) { for (long k = 1; k < giant; k = 2*k) { long j = ord - k; long val = PowerMod(gi, j, m); // val = g^j sKey.GenKeySWmatrix(1, val, keyID, keyID); } } #if 0 MAUTO // build the tree for this dimension, the internal nodes are 1 and // (subset of) gi^{giant}, gi^{2*giant}, ..., gi^{baby*giant}. We MAUTO sKey.resetTree(i,keyID); // remove existing tree, if any // keep a list of all the elements that are covered by the tree so far, // initialized to only the root (=1). std::unordered_set<long> covered({1}); // Make a list of the automorphisms for this dimension std::vector<long> autos; for (long j=1,val=gi; j<ord; j++) { // Do we have matrices for val and/or val/gi^{di}? if (!native) { long val2 = MulModPrecon(val, g2md, m, g2mdminv); if (sKey.haveKeySWmatrix(1,val2,keyID,keyID)) { autos.push_back(val2); } } if (sKey.haveKeySWmatrix(1,val,keyID,keyID)) { autos.push_back(val); } val = MulModPrecon(val, gi, m, giminv); // g^{j+1} } // Insert internal nodes and their children to tree for (long j=0,fromVal=1; j<giant; j++) { NTL::mulmod_precon_t fromminv = PrepMulModPrecon(fromVal, m); vector<long> children; for (long k: autos) { long toVal = MulModPrecon(k, fromVal, m, fromminv); if (covered.count(toVal)==0) { // toVal not covered yet covered.insert(toVal); children.push_back(toVal); } } if (!children.empty()) { // insert fromVal with its children sKey.add2tree(i, fromVal, children, keyID); } fromVal = MulModPrecon(fromVal, gb, m, gbminv); // g^{(j+1)*baby} } // Sanity-check, did we cover everything? long toCover = native? ord: (2*ord-1); if (covered.size()<toCover) cerr << "**Warning: order-"<<ord<<" dimension, covered "<<covered.size() << " of "<<toCover<<endl; #endif }
long gauss(mat_zz_p& M, long w) { long k, l; long i, j; long pos; zz_p t1, t2, t3; zz_p *x, *y; long n = M.NumRows(); long m = M.NumCols(); if (w < 0 || w > m) LogicError("gauss: bad args"); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long T1, T2; l = 0; for (k = 0; k < w && l < n; k++) { pos = -1; for (i = l; i < n; i++) { if (!IsZero(M[i][k])) { pos = i; break; } } if (pos != -1) { swap(M[pos], M[l]); inv(t3, M[l][k]); negate(t3, t3); for (i = l+1; i < n; i++) { // M[i] = M[i] + M[l]*M[i,k]*t3 mul(t1, M[i][k], t3); T1 = rep(t1); mulmod_precon_t T1pinv = PrepMulModPrecon(T1, p, pinv); clear(M[i][k]); x = M[i].elts() + (k+1); y = M[l].elts() + (k+1); for (j = k+1; j < m; j++, x++, y++) { // *x = *x + (*y)*t1 T2 = MulModPrecon(rep(*y), T1, p, T1pinv); T2 = AddMod(T2, rep(*x), p); (*x).LoopHole() = T2; } } l++; } } return l; }
void UseFFTPrime(long index) { long numprimes = FFTTables_store.length(); if (index < 0 || index > numprimes) Error("invalid FFT prime index"); if (index < numprimes) return; // index == numprimes long q, w; NextFFTPrime(q, w); double qinv = 1/((double) q); long mr = CalcMaxRoot(q); FFTTables_store.SetLength(numprimes+1); FFTTables = FFTTables_store.elts(); FFTPrimeInfo& info = FFTTables[numprimes]; info.q = q; info.qinv = qinv; info.RootTable.SetLength(mr+1); info.RootInvTable.SetLength(mr+1); info.TwoInvTable.SetLength(mr+1); info.TwoInvPreconTable.SetLength(mr+1); long *rt = &info.RootTable[0]; long *rit = &info.RootInvTable[0]; long *tit = &info.TwoInvTable[0]; mulmod_precon_t *tipt = &info.TwoInvPreconTable[0]; long j; long t; rt[mr] = w; for (j = mr-1; j >= 0; j--) rt[j] = MulMod(rt[j+1], rt[j+1], q); rit[mr] = InvMod(w, q); for (j = mr-1; j >= 0; j--) rit[j] = MulMod(rit[j+1], rit[j+1], q); t = InvMod(2, q); tit[0] = 1; for (j = 1; j <= mr; j++) tit[j] = MulMod(tit[j-1], t, q); for (j = 0; j <= mr; j++) tipt[j] = PrepMulModPrecon(tit[j], q, qinv); // initialize data structures for the legacy inteface NumFFTPrimes = FFTTables_store.length(); FFTPrime_store.SetLength(NumFFTPrimes); FFTPrime = FFTPrime_store.elts(); FFTPrime[NumFFTPrimes-1] = q; FFTPrimeInv_store.SetLength(NumFFTPrimes); FFTPrimeInv = FFTPrimeInv_store.elts(); FFTPrimeInv[NumFFTPrimes-1] = qinv; }
void mul(vec_zz_p& x, const vec_zz_p& a, const mat_zz_p& B) { long l = a.length(); long m = B.NumCols(); if (l != B.NumRows()) LogicError("matrix mul: dimension mismatch"); if (m == 0) { x.SetLength(0); } else if (m == 1) { long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long acc, tmp; long k; acc = 0; for(k = 1; k <= l; k++) { tmp = MulMod(rep(a(k)), rep(B(k,1)), p, pinv); acc = AddMod(acc, tmp, p); } x.SetLength(1); x(1).LoopHole() = acc; } else { // m > 1. precondition long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); vec_long::Watcher watch_mul_aux_vec(mul_aux_vec); mul_aux_vec.SetLength(m); long *acc = mul_aux_vec.elts(); long j, k; const zz_p* ap = a.elts(); for (j = 0; j < m; j++) acc[j] = 0; for (k = 0; k < l; k++) { long aa = rep(ap[k]); if (aa != 0) { const zz_p* bp = B[k].elts(); long T1; mulmod_precon_t aapinv = PrepMulModPrecon(aa, p, pinv); for (j = 0; j < m; j++) { T1 = MulModPrecon(rep(bp[j]), aa, p, aapinv); acc[j] = AddMod(acc[j], T1, p); } } } x.SetLength(m); zz_p *xp = x.elts(); for (j = 0; j < m; j++) xp[j].LoopHole() = acc[j]; } }