void FileList::AddFile(const char *name) { Vec<char> item; item.SetLength(strlen(name)+1); strcpy(item.elts(), name); data.append(item); }
// NOTE: the signature for this is in lzz_p.h void conv(vec_zz_p& x, const Vec<long>& a) { long i, n; n = a.length(); x.SetLength(n); VectorConv(n, x.elts(), a.elts()); }
void FFT(long* A, const long* a, long k, long q, const long* root, FFTMultipliers& tab) // performs a 2^k-point convolution modulo q { if (k <= 1) { if (k == 0) { A[0] = a[0]; return; } if (k == 1) { long a0 = AddMod(a[0], a[1], q); long a1 = SubMod(a[0], a[1], q); A[0] = a0; A[1] = a1; return; } } // assume k > 1 if (k > tab.MaxK) PrecompFFTMultipliers(k, q, root, tab); static Vec<long> AA_store; AA_store.SetLength(1L << k); long *AA = AA_store.elts(); BitReverseCopy(AA, a, k); long n = 1L << k; long s, m, m_half, m_fourth, i, j, t, u, t1, u1, tt, tt1; // s = 1 for (i = 0; i < n; i += 2) { t = AA[i + 1]; u = AA[i]; AA[i] = AddMod(u, t, q); AA[i+1] = SubMod(u, t, q); } for (s = 2; s < k; s++) { m = 1L << s; m_half = 1L << (s-1); m_fourth = 1L << (s-2); const long* wtab = tab.wtab_precomp[s].elts(); const mulmod_precon_t *wqinvtab = tab.wqinvtab_precomp[s].elts(); for (i = 0; i < n; i+= m) { long *AA0 = &AA[i]; long *AA1 = &AA[i + m_half]; #if (NTL_PIPELINE) // pipelining: seems to be faster t = AA1[0]; u = AA0[0]; t1 = MulModPrecon(AA1[1], wtab[1], q, wqinvtab[1]); u1 = AA0[1]; for (j = 0; j < m_half-2; j += 2) { long a02 = AA0[j+2]; long a03 = AA0[j+3]; long a12 = AA1[j+2]; long a13 = AA1[j+3]; long w2 = wtab[j+2]; long w3 = wtab[j+3]; mulmod_precon_t wqi2 = wqinvtab[j+2]; mulmod_precon_t wqi3 = wqinvtab[j+3]; tt = MulModPrecon(a12, w2, q, wqi2); long b00 = AddMod(u, t, q); long b10 = SubMod(u, t, q); tt1 = MulModPrecon(a13, w3, q, wqi3); long b01 = AddMod(u1, t1, q); long b11 = SubMod(u1, t1, q); AA0[j] = b00; AA1[j] = b10; AA0[j+1] = b01; AA1[j+1] = b11; t = tt; u = a02; t1 = tt1; u1 = a03; } AA0[j] = AddMod(u, t, q); AA1[j] = SubMod(u, t, q); AA0[j + 1] = AddMod(u1, t1, q); AA1[j + 1] = SubMod(u1, t1, q); } #else for (j = 0; j < m_half; j += 2) { const long a00 = AA0[j]; const long a01 = AA0[j+1]; const long a10 = AA1[j]; const long a11 = AA1[j+1]; const long w0 = wtab[j]; const long w1 = wtab[j+1]; const mulmod_precon_t wqi0 = wqinvtab[j]; const mulmod_precon_t wqi1 = wqinvtab[j+1]; const long tt = MulModPrecon(a10, w0, q, wqi0); const long uu = a00; const long b00 = AddMod(uu, tt, q); const long b10 = SubMod(uu, tt, q); const long tt1 = MulModPrecon(a11, w1, q, wqi1); const long uu1 = a01; const long b01 = AddMod(uu1, tt1, q); const long b11 = SubMod(uu1, tt1, q); AA0[j] = b00; AA0[j+1] = b01; AA1[j] = b10; AA1[j+1] = b11; } } #endif }
void FFT(long* A, const long* a, long k, long q, const long* root) // performs a 2^k-point convolution modulo q { if (k <= 1) { if (k == 0) { A[0] = a[0]; return; } if (k == 1) { long a0 = AddMod(a[0], a[1], q); long a1 = SubMod(a[0], a[1], q); A[0] = a0; A[1] = a1; return; } } // assume k > 1 static Vec<long> wtab_store; static Vec<mulmod_precon_t> wqinvtab_store; static Vec<long> AA_store; wtab_store.SetLength(1L << (k-2)); wqinvtab_store.SetLength(1L << (k-2)); AA_store.SetLength(1L << k); long * NTL_RESTRICT wtab = wtab_store.elts(); mulmod_precon_t * NTL_RESTRICT wqinvtab = wqinvtab_store.elts(); long *AA = AA_store.elts(); double qinv = 1/((double) q); wtab[0] = 1; wqinvtab[0] = PrepMulModPrecon(1, q, qinv); BitReverseCopy(AA, a, k); long n = 1L << k; long s, m, m_half, m_fourth, i, j, t, u, t1, u1, tt, tt1; long w; mulmod_precon_t wqinv; // s = 1 for (i = 0; i < n; i += 2) { t = AA[i + 1]; u = AA[i]; AA[i] = AddMod(u, t, q); AA[i+1] = SubMod(u, t, q); } for (s = 2; s < k; s++) { m = 1L << s; m_half = 1L << (s-1); m_fourth = 1L << (s-2); w = root[s]; wqinv = PrepMulModPrecon(w, q, qinv); // prepare wtab... if (s == 2) { wtab[1] = MulModPrecon(wtab[0], w, q, wqinv); wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); } else { // some software pipelining i = m_half-1; j = m_fourth-1; wtab[i-1] = wtab[j]; wqinvtab[i-1] = wqinvtab[j]; wtab[i] = MulModPrecon(wtab[i-1], w, q, wqinv); i -= 2; j --; for (; i >= 0; i -= 2, j --) { long wp2 = wtab[i+2]; long wm1 = wtab[j]; wqinvtab[i+2] = PrepMulModPrecon(wp2, q, qinv); wtab[i-1] = wm1; wqinvtab[i-1] = wqinvtab[j]; wtab[i] = MulModPrecon(wm1, w, q, wqinv); } wqinvtab[1] = PrepMulModPrecon(wtab[1], q, qinv); } for (i = 0; i < n; i+= m) { long * NTL_RESTRICT AA0 = &AA[i]; long * NTL_RESTRICT AA1 = &AA[i + m_half]; t = AA1[0]; u = AA0[0]; t1 = MulModPrecon(AA1[1], w, q, wqinv); u1 = AA0[1]; for (j = 0; j < m_half-2; j += 2) { long a02 = AA0[j+2]; long a03 = AA0[j+3]; long a12 = AA1[j+2]; long a13 = AA1[j+3]; long w2 = wtab[j+2]; long w3 = wtab[j+3]; mulmod_precon_t wqi2 = wqinvtab[j+2]; mulmod_precon_t wqi3 = wqinvtab[j+3]; tt = MulModPrecon(a12, w2, q, wqi2); long b00 = AddMod(u, t, q); long b10 = SubMod(u, t, q); t = tt; u = a02; tt1 = MulModPrecon(a13, w3, q, wqi3); long b01 = AddMod(u1, t1, q); long b11 = SubMod(u1, t1, q); t1 = tt1; u1 = a03; AA0[j] = b00; AA1[j] = b10; AA0[j+1] = b01; AA1[j+1] = b11; } AA0[j] = AddMod(u, t, q); AA1[j] = SubMod(u, t, q); AA0[j + 1] = AddMod(u1, t1, q); AA1[j + 1] = SubMod(u1, t1, q); } } // s == k...special case m = 1L << s; m_half = 1L << (s-1); m_fourth = 1L << (s-2); w = root[s]; wqinv = PrepMulModPrecon(w, q, qinv); // j = 0, 1 t = AA[m_half]; u = AA[0]; t1 = MulModPrecon(AA[1+ m_half], w, q, wqinv); u1 = AA[1]; A[0] = AddMod(u, t, q); A[m_half] = SubMod(u, t, q); A[1] = AddMod(u1, t1, q); A[1 + m_half] = SubMod(u1, t1, q); for (j = 2; j < m_half; j += 2) { t = MulModPrecon(AA[j + m_half], wtab[j >> 1], q, wqinvtab[j >> 1]); u = AA[j]; t1 = MulModPrecon(AA[j + 1+ m_half], wtab[j >> 1], q, wqinvtab[j >> 1]); t1 = MulModPrecon(t1, w, q, wqinv); u1 = AA[j + 1]; A[j] = AddMod(u, t, q); A[j + m_half] = SubMod(u, t, q); A[j + 1] = AddMod(u1, t1, q); A[j + 1 + m_half] = SubMod(u1, t1, q); } }
void UseFFTPrime(long index) { long numprimes = FFTTables_store.length(); if (index < 0 || index > numprimes) Error("invalid FFT prime index"); if (index < numprimes) return; // index == numprimes long q, w; NextFFTPrime(q, w); double qinv = 1/((double) q); long mr = CalcMaxRoot(q); FFTTables_store.SetLength(numprimes+1); FFTTables = FFTTables_store.elts(); FFTPrimeInfo& info = FFTTables[numprimes]; info.q = q; info.qinv = qinv; info.RootTable.SetLength(mr+1); info.RootInvTable.SetLength(mr+1); info.TwoInvTable.SetLength(mr+1); info.TwoInvPreconTable.SetLength(mr+1); long *rt = &info.RootTable[0]; long *rit = &info.RootInvTable[0]; long *tit = &info.TwoInvTable[0]; mulmod_precon_t *tipt = &info.TwoInvPreconTable[0]; long j; long t; rt[mr] = w; for (j = mr-1; j >= 0; j--) rt[j] = MulMod(rt[j+1], rt[j+1], q); rit[mr] = InvMod(w, q); for (j = mr-1; j >= 0; j--) rit[j] = MulMod(rit[j+1], rit[j+1], q); t = InvMod(2, q); tit[0] = 1; for (j = 1; j <= mr; j++) tit[j] = MulMod(tit[j-1], t, q); for (j = 0; j <= mr; j++) tipt[j] = PrepMulModPrecon(tit[j], q, qinv); // initialize data structures for the legacy inteface NumFFTPrimes = FFTTables_store.length(); FFTPrime_store.SetLength(NumFFTPrimes); FFTPrime = FFTPrime_store.elts(); FFTPrime[NumFFTPrimes-1] = q; FFTPrimeInv_store.SetLength(NumFFTPrimes); FFTPrimeInv = FFTPrimeInv_store.elts(); FFTPrimeInv[NumFFTPrimes-1] = qinv; }