zz_pInfoT::zz_pInfoT(long NewP, long maxroot) { if (maxroot < 0) LogicError("zz_pContext: maxroot may not be negative"); if (NewP <= 1) LogicError("zz_pContext: p must be > 1"); if (NumBits(NewP) > NTL_SP_NBITS) ResourceError("zz_pContext: modulus too big"); ZZ P, B, M, M1, MinusM; long n, i; long q, t; p = NewP; pinv = 1/double(p); p_info = 0; conv(P, p); sqr(B, P); LeftShift(B, B, maxroot+NTL_FFTFudge); set(M); n = 0; while (M <= B) { UseFFTPrime(n); q = GetFFTPrime(n); n++; mul(M, M, q); } if (n > 4) LogicError("zz_pInit: too many primes"); NumPrimes = n; PrimeCnt = n; MaxRoot = CalcMaxRoot(q); if (maxroot < MaxRoot) MaxRoot = maxroot; negate(MinusM, M); MinusMModP = rem(MinusM, p); CoeffModP.SetLength(n); x.SetLength(n); u.SetLength(n); for (i = 0; i < n; i++) { q = GetFFTPrime(i); div(M1, M, q); t = rem(M1, q); t = InvMod(t, q); if (NTL_zz_p_QUICK_CRT) mul(M1, M1, t); CoeffModP[i] = rem(M1, p); x[i] = ((double) t)/((double) q); u[i] = t; } }
zz_pInfoT::zz_pInfoT(INIT_FFT_TYPE, FFTPrimeInfo *info) { p = info->q; pinv = info->qinv; p_info = info; NumPrimes = 1; PrimeCnt = 0; MaxRoot = CalcMaxRoot(p); }
NTL_CLIENT // It is assumed that m,q,context, and root are already set. If root is set // to zero, it will be computed by the compRoots() method. Then rInv is // computed as the inverse of root. zz_pContext BuildContext(long p, long maxroot) { if (maxroot <= CalcMaxRoot(p)) return zz_pContext(INIT_USER_FFT, p); else return zz_pContext(p, maxroot); }
void InitFFTPrimeInfo(FFTPrimeInfo& info, long q, long w, long bigtab) { double qinv = 1/((double) q); long mr = CalcMaxRoot(q); info.q = q; info.qinv = qinv; info.zz_p_context = 0; info.RootTable.SetLength(mr+1); info.RootInvTable.SetLength(mr+1); info.TwoInvTable.SetLength(mr+1); info.TwoInvPreconTable.SetLength(mr+1); long *rt = &info.RootTable[0]; long *rit = &info.RootInvTable[0]; long *tit = &info.TwoInvTable[0]; mulmod_precon_t *tipt = &info.TwoInvPreconTable[0]; long j; long t; rt[mr] = w; for (j = mr-1; j >= 0; j--) rt[j] = MulMod(rt[j+1], rt[j+1], q); rit[mr] = InvMod(w, q); for (j = mr-1; j >= 0; j--) rit[j] = MulMod(rit[j+1], rit[j+1], q); t = InvMod(2, q); tit[0] = 1; for (j = 1; j <= mr; j++) tit[j] = MulMod(tit[j-1], t, q); for (j = 0; j <= mr; j++) tipt[j] = PrepMulModPrecon(tit[j], q, qinv); info.bigtab = bigtab; }
zz_pInfoT::zz_pInfoT(INIT_USER_FFT_TYPE, long q) { long w; if (!IsFFTPrime(q, w)) LogicError("invalid user supplied prime"); p = q; pinv = 1/((double) q); p_info_owner.make(); p_info = p_info_owner.get(); bool bigtab = false; #ifdef NTL_FFT_BIGTAB bigtab = true; #endif InitFFTPrimeInfo(*p_info, q, w, bigtab); NumPrimes = 1; PrimeCnt = 0; MaxRoot = CalcMaxRoot(p); }
zz_pInfoT::zz_pInfoT(long Index) { ref_count = 1; index = Index; if (index < 0) Error("bad FFT prime index"); // allows non-consecutive indices...I'm not sure why while (NumFFTPrimes < index) UseFFTPrime(NumFFTPrimes); UseFFTPrime(index); p = FFTPrime[index]; pinv = FFTPrimeInv[index]; NumPrimes = 1; PrimeCnt = 0; MaxRoot = CalcMaxRoot(p); }
void ZZ_p::DoInstall() { SmartPtr<ZZ_pTmpSpaceT> tmps = 0; do { // NOTE: thread safe lazy init Lazy<ZZ_pFFTInfoT>::Builder builder(ZZ_pInfo->FFTInfo); if (!builder()) break; UniquePtr<ZZ_pFFTInfoT> FFTInfo; FFTInfo.make(); ZZ B, M, M1, M2, M3; long n, i; long q, t; mulmod_t qinv; sqr(B, ZZ_pInfo->p); LeftShift(B, B, NTL_FFTMaxRoot+NTL_FFTFudge); // FIXME: the following is quadratic time...would // be nice to get a faster solution... // One could estimate the # of primes by summing logs, // then multiply using a tree-based multiply, then // adjust up or down... // Assuming IEEE floating point, the worst case estimate // for error guarantees a correct answer +/- 1 for // numprimes up to 2^25...for sure we won't be // using that many primes...we can certainly put in // a sanity check, though. // If I want a more accuaruate summation (with using Kahan, // which has some portability issues), I could represent // numbers as x = a + f, where a is integer and f is the fractional // part. Summing in this representation introduces an *absolute* // error of 2 epsilon n, which is just as good as Kahan // for this application. // same strategy could also be used in the ZZX HomMul routine, // if we ever want to make that subquadratic set(M); n = 0; while (M <= B) { UseFFTPrime(n); q = GetFFTPrime(n); n++; mul(M, M, q); } FFTInfo->NumPrimes = n; FFTInfo->MaxRoot = CalcMaxRoot(q); double fn = double(n); if (8.0*fn*(fn+48) > NTL_FDOUBLE_PRECISION) ResourceError("modulus too big"); if (8.0*fn*(fn+48) <= NTL_FDOUBLE_PRECISION/double(NTL_SP_BOUND)) FFTInfo->QuickCRT = true; else FFTInfo->QuickCRT = false; // FIXME: some of this stuff does not need to be initialized // at all if FFTInfo->crt_struct.special() FFTInfo->x.SetLength(n); FFTInfo->u.SetLength(n); FFTInfo->uqinv.SetLength(n); FFTInfo->rem_struct.init(n, ZZ_pInfo->p, GetFFTPrime); FFTInfo->crt_struct.init(n, ZZ_pInfo->p, GetFFTPrime); if (!FFTInfo->crt_struct.special()) { ZZ qq, rr; DivRem(qq, rr, M, ZZ_pInfo->p); NegateMod(FFTInfo->MinusMModP, rr, ZZ_pInfo->p); for (i = 0; i < n; i++) { q = GetFFTPrime(i); qinv = GetFFTPrimeInv(i); long tt = rem(qq, q); mul(M2, ZZ_pInfo->p, tt); add(M2, M2, rr); div(M2, M2, q); // = (M/q) rem p div(M1, M, q); t = rem(M1, q); t = InvMod(t, q); mul(M3, M2, t); rem(M3, M3, ZZ_pInfo->p); FFTInfo->crt_struct.insert(i, M3); FFTInfo->x[i] = ((double) t)/((double) q); FFTInfo->u[i] = t; FFTInfo->uqinv[i] = PrepMulModPrecon(FFTInfo->u[i], q, qinv); } } tmps = MakeSmart<ZZ_pTmpSpaceT>(); tmps->crt_tmp_vec.fetch(FFTInfo->crt_struct); tmps->rem_tmp_vec.fetch(FFTInfo->rem_struct); builder.move(FFTInfo); } while (0); if (!tmps) { const ZZ_pFFTInfoT *FFTInfo = ZZ_pInfo->FFTInfo.get(); tmps = MakeSmart<ZZ_pTmpSpaceT>(); tmps->crt_tmp_vec.fetch(FFTInfo->crt_struct); tmps->rem_tmp_vec.fetch(FFTInfo->rem_struct); } ZZ_pTmpSpace = tmps; }
zz_pContext BuildContext(long p, long maxroot) { if (maxroot <= CalcMaxRoot(p)) return zz_pContext(INIT_USER_FFT, p); else return zz_pContext(p, maxroot); }
newNTL_START_IMPL zz_pInfoT::zz_pInfoT(long NewP, long maxroot) { ref_count = 1; if (maxroot < 0) Error("zz_pContext: maxroot may not be negative"); if (NewP <= 1) Error("zz_pContext: p must be > 1"); if (NumBits(NewP) > newNTL_SP_NBITS) Error("zz_pContext: modulus too big"); ZZ P, B, M, M1, MinusM; long n, i; long q, t; p = NewP; pinv = 1/double(p); index = -1; conv(P, p); sqr(B, P); LeftShift(B, B, maxroot+newNTL_FFTFudge); set(M); n = 0; while (M <= B) { UseFFTPrime(n); q = FFTPrime[n]; n++; mul(M, M, q); } if (n > 4) Error("zz_pInit: too many primes"); NumPrimes = n; PrimeCnt = n; MaxRoot = CalcMaxRoot(q); if (maxroot < MaxRoot) MaxRoot = maxroot; negate(MinusM, M); MinusMModP = rem(MinusM, p); if (!(CoeffModP = (long *) newNTL_MALLOC(n, sizeof(long), 0))) Error("out of space"); if (!(x = (double *) newNTL_MALLOC(n, sizeof(double), 0))) Error("out of space"); if (!(u = (long *) newNTL_MALLOC(n, sizeof(long), 0))) Error("out of space"); for (i = 0; i < n; i++) { q = FFTPrime[i]; div(M1, M, q); t = rem(M1, q); t = InvMod(t, q); mul(M1, M1, t); CoeffModP[i] = rem(M1, p); x[i] = ((double) t)/((double) q); u[i] = t; } }
void ZZ_pInfoT::init() { ZZ B, M, M1, M2, M3; long n, i; long q, t; initialized = 1; sqr(B, p); LeftShift(B, B, NTL_FFTMaxRoot+NTL_FFTFudge); set(M); n = 0; while (M <= B) { UseFFTPrime(n); q = FFTPrime[n]; n++; mul(M, M, q); } NumPrimes = n; MaxRoot = CalcMaxRoot(q); double fn = double(n); if (8.0*fn*(fn+32) > NTL_FDOUBLE_PRECISION) Error("modulus too big"); if (8.0*fn*(fn+32) > NTL_FDOUBLE_PRECISION/double(NTL_SP_BOUND)) QuickCRT = 0; else QuickCRT = 1; if (!(x = (double *) NTL_MALLOC(n, sizeof(double), 0))) Error("out of space"); if (!(u = (long *) NTL_MALLOC(n, sizeof(long), 0))) Error("out of space"); ZZ_p_rem_struct_init(&rem_struct, n, p, FFTPrime); ZZ_p_crt_struct_init(&crt_struct, n, p, FFTPrime); if (ZZ_p_crt_struct_special(crt_struct)) return; ZZ qq, rr; DivRem(qq, rr, M, p); NegateMod(MinusMModP, rr, p); for (i = 0; i < n; i++) { q = FFTPrime[i]; long tt = rem(qq, q); mul(M2, p, tt); add(M2, M2, rr); div(M2, M2, q); // = (M/q) rem p div(M1, M, q); t = rem(M1, q); t = InvMod(t, q); mul(M3, M2, t); rem(M3, M3, p); ZZ_p_crt_struct_insert(crt_struct, i, M3); x[i] = ((double) t)/((double) q); u[i] = t; } }
void UseFFTPrime(long index) { long numprimes = FFTTables_store.length(); if (index < 0 || index > numprimes) Error("invalid FFT prime index"); if (index < numprimes) return; // index == numprimes long q, w; NextFFTPrime(q, w); double qinv = 1/((double) q); long mr = CalcMaxRoot(q); FFTTables_store.SetLength(numprimes+1); FFTTables = FFTTables_store.elts(); FFTPrimeInfo& info = FFTTables[numprimes]; info.q = q; info.qinv = qinv; info.RootTable.SetLength(mr+1); info.RootInvTable.SetLength(mr+1); info.TwoInvTable.SetLength(mr+1); info.TwoInvPreconTable.SetLength(mr+1); long *rt = &info.RootTable[0]; long *rit = &info.RootInvTable[0]; long *tit = &info.TwoInvTable[0]; mulmod_precon_t *tipt = &info.TwoInvPreconTable[0]; long j; long t; rt[mr] = w; for (j = mr-1; j >= 0; j--) rt[j] = MulMod(rt[j+1], rt[j+1], q); rit[mr] = InvMod(w, q); for (j = mr-1; j >= 0; j--) rit[j] = MulMod(rit[j+1], rit[j+1], q); t = InvMod(2, q); tit[0] = 1; for (j = 1; j <= mr; j++) tit[j] = MulMod(tit[j-1], t, q); for (j = 0; j <= mr; j++) tipt[j] = PrepMulModPrecon(tit[j], q, qinv); // initialize data structures for the legacy inteface NumFFTPrimes = FFTTables_store.length(); FFTPrime_store.SetLength(NumFFTPrimes); FFTPrime = FFTPrime_store.elts(); FFTPrime[NumFFTPrimes-1] = q; FFTPrimeInv_store.SetLength(NumFFTPrimes); FFTPrimeInv = FFTPrimeInv_store.elts(); FFTPrimeInv[NumFFTPrimes-1] = qinv; }