void KarMul(ZZX& c, const ZZX& a, const ZZX& b) { if (IsZero(a) || IsZero(b)) { clear(c); return; } if (&a == &b) { KarSqr(c, a); return; } vec_ZZ mem; const ZZ *ap, *bp; ZZ *cp; long sa = a.rep.length(); long sb = b.rep.length(); if (&a == &c) { mem = a.rep; ap = mem.elts(); } else ap = a.rep.elts(); if (&b == &c) { mem = b.rep; bp = mem.elts(); } else bp = b.rep.elts(); c.rep.SetLength(sa+sb-1); cp = c.rep.elts(); long maxa, maxb, xover; maxa = MaxBits(a); maxb = MaxBits(b); xover = 2; if (sa < xover || sb < xover) PlainMul(cp, ap, sa, bp, sb); else { /* karatsuba */ long n, hn, sp, depth; n = max(sa, sb); sp = 0; depth = 0; do { hn = (n+1) >> 1; sp += (hn << 2) - 1; n = hn; depth++; } while (n >= xover); ZZVec stk; stk.SetSize(sp, ((maxa + maxb + NumBits(min(sa, sb)) + 2*depth + 10) + NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS); KarMul(cp, ap, sa, bp, sb, stk.elts()); } c.normalize(); }
int main() { SetSeed(ZZ(0)); cerr << "This is NTL version " << NTL_VERSION << "\n"; cerr << "Hardware charactersitics:\n"; cerr << "NTL_BITS_PER_LONG = " << NTL_BITS_PER_LONG << "\n"; cerr << "NTL_ZZ_NBITS = " << NTL_ZZ_NBITS << "\n"; cerr << "NTL_SP_NBITS = " << NTL_SP_NBITS << "\n"; #ifdef NTL_HAVE_LL_TYPE cerr << "NTL_HAVE_LL_TYPE\n"; #endif #ifdef NTL_LONGDOUBLE_SP_MULMOD cerr << "NTL_LONGDOUBLE_SP_MULMOD\n"; #endif #ifdef NTL_LONGLONG_SP_MULMOD cerr << "NTL_LONGLONG_SP_MULMOD\n"; #endif cerr << "\n"; cerr << "Basic Configuration Options:\n"; #ifdef NTL_LEGACY_NO_NAMESPACE cerr << "NTL_LEGACY_NO_NAMESPACE\n"; #endif #ifdef NTL_LEGACY_INPUT_ERROR cerr << "NTL_LEGACY_INPUT_ERROR\n"; #endif #ifdef NTL_THREADS cerr << "NTL_THREADS\n"; #endif #ifdef NTL_EXCEPTIONS cerr << "NTL_EXCEPTIONS\n"; #endif #ifdef NTL_THREAD_BOOST cerr << "NTL_THREAD_BOOST\n"; #endif #ifdef NTL_LEGACY_SP_MULMOD cout << "NTL_LEGACY_SP_MULMOD\n"; #endif #ifdef NTL_DISABLE_LONGDOUBLE cout << "NTL_DISABLE_LONGDOUBLE\n"; #endif #ifdef NTL_DISABLE_LONGLONG cout << "NTL_DISABLE_LONGLONG\n"; #endif #ifdef NTL_MAXIMIZE_SP_NBITS cout << "NTL_MAXIMIZE_SP_NBITS\n"; #endif #ifdef NTL_GMP_LIP cerr << "NTL_GMP_LIP\n"; #endif #ifdef NTL_GF2X_LIB cerr << "NTL_GF2X_LIB\n"; #endif #ifdef NTL_PCLMUL cerr << "NTL_PCLMUL\n"; #endif #ifdef NTL_LONG_LONG_TYPE cerr << "NTL_LONG_LONG_TYPE: "; cerr << make_string(NTL_LONG_LONG_TYPE) << "\n"; #endif #ifdef NTL_UNSIGNED_LONG_LONG_TYPE cerr << "NTL_UNSIGNED_LONG_LONG_TYPE: "; cerr << make_string(NTL_UNSIGNED_LONG_LONG_TYPE) << "\n"; #endif #ifdef NTL_X86_FIX cerr << "NTL_X86_FIX\n"; #endif #ifdef NTL_NO_X86_FIX cerr << "NTL_NO_X86_FIX\n"; #endif #ifdef NTL_NO_INIT_TRANS cerr << "NTL_NO_INIT_TRANS\n"; #endif #ifdef NTL_CLEAN_INT cerr << "NTL_CLEAN_INT\n"; #endif #ifdef NTL_CLEAN_PTR cerr << "NTL_CLEAN_PTR\n"; #endif #ifdef NTL_RANGE_CHECK cerr << "NTL_RANGE_CHECK\n"; #endif cerr << "\n"; cerr << "Resolution of double-word types:\n"; cerr << make_string(NTL_LL_TYPE) << "\n"; cerr << make_string(NTL_ULL_TYPE) << "\n"; cerr << "\n"; cerr << "Performance Options:\n"; #ifdef NTL_LONG_LONG cerr << "NTL_LONG_LONG\n"; #endif #ifdef NTL_AVOID_FLOAT cerr << "NTL_AVOID_FLOAT\n"; #endif #ifdef NTL_SPMM_ULL cerr << "NTL_SPMM_ULL\n"; #endif #ifdef NTL_SPMM_ASM cerr << "NTL_SPMM_ASM\n"; #endif #ifdef NTL_AVOID_BRANCHING cerr << "NTL_AVOID_BRANCHING\n"; #endif #ifdef NTL_FFT_BIGTAB cout << "NTL_FFT_BIGTAB\n"; #endif #ifdef NTL_FFT_LAZYMUL cout << "NTL_FFT_LAZYMUL\n"; #endif #ifdef NTL_TBL_REM cerr << "NTL_TBL_REM\n"; #endif #ifdef NTL_TBL_REM_LL cerr << "NTL_TBL_REM_LL\n"; #endif #ifdef NTL_CRT_ALTCODE cerr << "NTL_CRT_ALTCODE\n"; #endif #ifdef NTL_CRT_ALTCODE_SMALL cerr << "NTL_CRT_ALTCODE_SMALL\n"; #endif #ifdef NTL_GF2X_ALTCODE cerr << "NTL_GF2X_ALTCODE\n"; #endif #ifdef NTL_GF2X_ALTCODE1 cerr << "NTL_GF2X_ALTCODE1\n"; #endif #ifdef NTL_GF2X_NOINLINE cerr << "NTL_GF2X_NOINLINE\n"; #endif cerr << "\n\n"; cerr << "running tests"; long n, k, i; n = 250; k = 16000; ZZ p; for (i = 0; i < 15; i++) { // cerr << n << "/" << k; cerr << "."; RandomLen(p, k); ZZ_p::init(p); ZZ_pX a, b, c, c1; random(a, n); random(b, n); FFTMul(c, a, b); //cerr << ZZ_pInfo->FFTInfo->NumPrimes; c1 = conv<ZZ_pX>( KarMul( conv<ZZX>(a), conv<ZZX>(b) ) ); if (c1 != c) { cerr << "ZZ_pX mul failed!\n"; return 1; } n = long(n * 1.35); k = long(k / 1.414); } // small prime tests...I've made some changes in v5.3 // that should be checked on various platforms, so // we might as well check them here. if (SmallModulusTest(17, 1000)) { cerr << "first SmallModulusTest failed!!\n"; return 1; } if (SmallModulusTest((1L << (NTL_SP_NBITS))-1, 1000)) { cerr << "second SmallModulusTest failed!!\n"; return 1; } // Test gf2x code.... if (GF2X_test()) { cerr << "GF2X test failed!\n"; return 1; } cerr << "OK\n"; ZZ x1, x2, x3, x4; double t; RandomLen(x1, 1024); RandomBnd(x2, x1); RandomBnd(x3, x1); mul(x4, x2, x3); t = GetTime(); for (i = 0; i < 100000; i++) mul(x4, x2, x3); t = GetTime()-t; cerr << "time for 1024-bit mul: " << t*10 << "us"; cerr << "\n"; rem(x2, x4, x1); t = GetTime(); for (i = 0; i < 100000; i++) rem(x2, x4, x1); t = GetTime()-t; cerr << "time for 2048/1024-bit rem: " << t*10 << "us"; cerr << "\n"; GenPrime(p, 1024); RandomBnd(x1, p); if (IsZero(x1)) set(x1); InvMod(x2, x1, p); t = GetTime(); for (i = 0; i < 1000; i++) InvMod(x2, x1, p); t = GetTime()-t; cerr << "time for 1024-bit modular inverse: " << t*1000 << "us"; cerr << "\n"; // test modulus switching n = 1024; k = 1024; RandomLen(p, k); ZZ_p::init(p); if (!IsOdd(p)) p++; ZZ_pX j1, j2, j3; random(j1, n); random(j2, n); mul(j3, j1, j2); t = GetTime(); for (i = 0; i < 200; i++) mul(j3, j1, j2); t = GetTime()-t; cerr << "time to multiply degree 1023 polynomials\n modulo a 1024-bit number: "; cerr << (t/200) << "s"; cerr << "\n"; GF2X_time(); return 0; }
static void KarMul(ZZ *c, const ZZ *a, long sa, const ZZ *b, long sb, ZZ *stk) { if (sa < sb) { { long t = sa; sa = sb; sb = t; } { const ZZ *t = a; a = b; b = t; } } if (sb == 1) { if (sa == 1) mul(*c, *a, *b); else PlainMul1(c, a, sa, *b); return; } if (sb == 2 && sa == 2) { mul(c[0], a[0], b[0]); mul(c[2], a[1], b[1]); add(stk[0], a[0], a[1]); add(stk[1], b[0], b[1]); mul(c[1], stk[0], stk[1]); sub(c[1], c[1], c[0]); sub(c[1], c[1], c[2]); return; } long hsa = (sa + 1) >> 1; if (hsa < sb) { /* normal case */ long hsa2 = hsa << 1; ZZ *T1, *T2, *T3; T1 = stk; stk += hsa; T2 = stk; stk += hsa; T3 = stk; stk += hsa2 - 1; /* compute T1 = a_lo + a_hi */ KarFold(T1, a, sa, hsa); /* compute T2 = b_lo + b_hi */ KarFold(T2, b, sb, hsa); /* recursively compute T3 = T1 * T2 */ KarMul(T3, T1, hsa, T2, hsa, stk); /* recursively compute a_hi * b_hi into high part of c */ /* and subtract from T3 */ KarMul(c + hsa2, a+hsa, sa-hsa, b+hsa, sb-hsa, stk); KarSub(T3, c + hsa2, sa + sb - hsa2 - 1); /* recursively compute a_lo*b_lo into low part of c */ /* and subtract from T3 */ KarMul(c, a, hsa, b, hsa, stk); KarSub(T3, c, hsa2 - 1); clear(c[hsa2 - 1]); /* finally, add T3 * X^{hsa} to c */ KarAdd(c+hsa, T3, hsa2-1); } else { /* degenerate case */ ZZ *T; T = stk; stk += hsa + sb - 1; /* recursively compute b*a_hi into high part of c */ KarMul(c + hsa, a + hsa, sa - hsa, b, sb, stk); /* recursively compute b*a_lo into T */ KarMul(T, a, hsa, b, sb, stk); KarFix(c, T, hsa + sb - 1, hsa); } }
ZZX KarMul(const ZZX& a, const ZZX& b) { ZZX res; KarMul(res, a, b); return res; }