NTL_CLIENT #define make_string_aux(x) #x #define make_string(x) make_string_aux(x) int SmallModulusTest(long p, long n) { zz_pBak bak; bak.save(); zz_p::init(p); zz_pX a, b, c, cc; random(a, n); random(b, n); PlainMul(c, a, b); FFTMul(cc, a, b); int res; res = (c != cc); bak.restore(); return res; }
NTL_CLOSE_NNS NTL_CLIENT int main() { GF2X p; BuildIrred(p, 200); GF2E::init(p); GF2EX f; SetCoeff(f, 41); SetCoeff(f, 1); SetCoeff(f, 0); GF2X a; SetCoeff(a, 117); SetCoeff(a, 10); SetCoeff(a, 0); GF2EX g, h; SetX(g); SetCoeff(g, 0, to_GF2E(a)); MinPolyMod(h, g, f); f = h; vec_pair_GF2EX_long u; CanZass(u, f, 1); cerr << "factorization pattern:"; long i; for (i = 0; i < u.length(); i++) { cerr << " "; long k = u[i].b; if (k > 1) cerr << k << "*"; cerr << deg(u[i].a); } cerr << "\n\n\n"; GF2EX ff; mul(ff, u); if (f != ff || u.length() != 11) { cerr << "GF2EXTest NOT OK\n"; return 1; } { cerr << "multiplication test...\n"; BuildIrred(p, 512); GF2E::init(p); GF2EX A, B, C, C1; random(A, 512); random(B, 512); double t; long i; t = GetTime(); for (i = 0; i < 10; i++) PlainMul(C, A, B); t = GetTime() - t; cerr << "time for plain mul of degree 511 over GF(2^512): " << (t/10) << "s\n"; t = GetTime(); for (i = 0; i < 10; i++) mul(C1, A, B); t = GetTime() - t; cerr << "time for karatsuba mul of degree 511 over GF(2^512): " << (t/10) << "s\n"; if (C != C1) { cerr << "GF2EXTest NOT OK\n"; return 1; } } { cerr << "multiplication test...\n"; BuildIrred(p, 16); GF2E::init(p); GF2EX A, B, C, C1; random(A, 512); random(B, 512); double t; t = GetTime(); for (i = 0; i < 10; i++) PlainMul(C, A, B); t = GetTime() - t; cerr << "time for plain mul of degree 511 over GF(2^16): " << (t/10) << "s\n"; t = GetTime(); for (i = 0; i < 10; i++) mul(C1, A, B); t = GetTime() - t; cerr << "time for karatsuba mul of degree 511 over GF(2^16): " << (t/10) << "s\n"; if (C != C1) { cerr << "GF2EXTest NOT OK\n"; return 1; } } cerr << "GF2EXTest OK\n"; return 0; }
void KarMul(ZZX& c, const ZZX& a, const ZZX& b) { if (IsZero(a) || IsZero(b)) { clear(c); return; } if (&a == &b) { KarSqr(c, a); return; } vec_ZZ mem; const ZZ *ap, *bp; ZZ *cp; long sa = a.rep.length(); long sb = b.rep.length(); if (&a == &c) { mem = a.rep; ap = mem.elts(); } else ap = a.rep.elts(); if (&b == &c) { mem = b.rep; bp = mem.elts(); } else bp = b.rep.elts(); c.rep.SetLength(sa+sb-1); cp = c.rep.elts(); long maxa, maxb, xover; maxa = MaxBits(a); maxb = MaxBits(b); xover = 2; if (sa < xover || sb < xover) PlainMul(cp, ap, sa, bp, sb); else { /* karatsuba */ long n, hn, sp, depth; n = max(sa, sb); sp = 0; depth = 0; do { hn = (n+1) >> 1; sp += (hn << 2) - 1; n = hn; depth++; } while (n >= xover); ZZVec stk; stk.SetSize(sp, ((maxa + maxb + NumBits(min(sa, sb)) + 2*depth + 10) + NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS); KarMul(cp, ap, sa, bp, sb, stk.elts()); } c.normalize(); }
int main() { _newntl_gmp_hack = 0; long n, k; n = 200; k = 10*newNTL_ZZ_NBITS; ZZ p; RandomLen(p, k); ZZ_p::init(p); // initialization ZZ_pX f, g, h, r1, r2, r3; random(g, n); // g = random polynomial of degree < n random(h, n); // h = " " random(f, n); // f = " " SetCoeff(f, n); // Sets coefficient of X^n to 1 // For doing arithmetic mod f quickly, one must pre-compute // some information. ZZ_pXModulus F; build(F, f); PlainMul(r1, g, h); // this uses classical arithmetic PlainRem(r1, r1, f); MulMod(r2, g, h, F); // this uses the FFT MulMod(r3, g, h, f); // uses FFT, but slower // compare the results... if (r1 != r2) { printf("999999999999999 "); print_flag(); return 0; } else if (r1 != r3) { printf("999999999999999 "); print_flag(); return 0; } double t; long i; long iter; n = 1024; k = 1024; RandomLen(p, k); ZZ_p::init(p); ZZ_pX j1, j2, j3; random(j1, n); random(j2, n); mul(j3, j1, j2); iter = 1; do { t = GetTime(); for (i = 0; i < iter; i++) { FFTMul(j3, j1, j2); } t = GetTime() - t; iter = 2*iter; } while(t < 1); iter = iter/2; iter = long((2/t)*iter) + 1; double tvec[5]; long w; for (w = 0; w < 5; w++) { t = GetTime(); for (i = 0; i < iter; i++) { FFTMul(j3, j1, j2); } t = GetTime() - t; tvec[w] = t; } t = clean_data(tvec); t = floor((t/iter)*1e12); if (t < 0 || t >= 1e15) printf("999999999999999 "); else printf("%015.0f ", t); printf(" [%ld] ", iter); print_flag(); return 0; }
int main() { #ifdef NTL_SPMM_ULL if (sizeof(NTL_ULL_TYPE) < 2*sizeof(long)) { printf("999999999999999 "); print_flag(); return 0; } #endif long n, k; n = 200; k = 10*NTL_ZZ_NBITS; ZZ p; RandomLen(p, k); ZZ_p::init(p); // initialization ZZ_pX f, g, h, r1, r2, r3; random(g, n); // g = random polynomial of degree < n random(h, n); // h = " " random(f, n); // f = " " SetCoeff(f, n); // Sets coefficient of X^n to 1 // For doing arithmetic mod f quickly, one must pre-compute // some information. ZZ_pXModulus F; build(F, f); PlainMul(r1, g, h); // this uses classical arithmetic PlainRem(r1, r1, f); MulMod(r2, g, h, F); // this uses the FFT MulMod(r3, g, h, f); // uses FFT, but slower // compare the results... if (r1 != r2) { printf("999999999999999 "); print_flag(); return 0; } else if (r1 != r3) { printf("999999999999999 "); print_flag(); return 0; } double t; long i, j; long iter; const int nprimes = 30; const long L = 12; const long N = 1L << L; long r; for (r = 0; r < nprimes; r++) UseFFTPrime(r); vec_long aa[nprimes], AA[nprimes]; for (r = 0; r < nprimes; r++) { aa[r].SetLength(N); AA[r].SetLength(N); for (i = 0; i < N; i++) aa[r][i] = RandomBnd(GetFFTPrime(r)); FFTFwd(AA[r].elts(), aa[r].elts(), L, r); FFTRev1(AA[r].elts(), AA[r].elts(), L, r); } iter = 1; do { t = GetTime(); for (j = 0; j < iter; j++) { for (r = 0; r < nprimes; r++) { long *AAp = AA[r].elts(); long *aap = aa[r].elts(); long q = GetFFTPrime(r); mulmod_t qinv = GetFFTPrimeInv(r); FFTFwd(AAp, aap, L, r); FFTRev1(AAp, aap, L, r); for (i = 0; i < N; i++) AAp[i] = NormalizedMulMod(AAp[i], aap[i], q, qinv); } } t = GetTime() - t; iter = 2*iter; } while(t < 1); iter = iter/2; iter = long((1.5/t)*iter) + 1; double tvec[5]; long w; for (w = 0; w < 5; w++) { t = GetTime(); for (j = 0; j < iter; j++) { for (r = 0; r < nprimes; r++) { long *AAp = AA[r].elts(); long *aap = aa[r].elts(); long q = GetFFTPrime(r); mulmod_t qinv = GetFFTPrimeInv(r); FFTFwd(AAp, aap, L, r); FFTRev1(AAp, aap, L, r); for (i = 0; i < N; i++) AAp[i] = NormalizedMulMod(AAp[i], aap[i], q, qinv); } } t = GetTime() - t; tvec[w] = t; } t = clean_data(tvec); t = floor((t/iter)*1e13); if (t < 0 || t >= 1e15) printf("999999999999999 "); else printf("%015.0f ", t); printf(" [%ld] ", iter); print_flag(); return 0; }
int main() { cerr << "This is NTL version " << NTL_VERSION << "\n"; cerr << "Basic Configuration Options:\n"; #ifdef NTL_STD_CXX cerr << "NTL_STD_CXX\n"; #endif #ifdef NTL_PSTD_NNS cerr << "NTL_PSTD_NNS\n"; #endif #ifdef NTL_PSTD_NHF cerr << "NTL_PSTD_NHF\n"; #endif #ifdef NTL_PSTD_NTN cerr << "NTL_PSTD_NTN\n"; #endif #ifdef NTL_GMP_LIP cerr << "NTL_GMP_LIP\n"; #endif #ifdef NTL_GMP_HACK cerr << "NTL_GMP_HACK\n"; #endif #ifdef NTL_GF2X_LIB cerr << "NTL_GF2X_LIB\n"; #endif #ifdef NTL_LONG_LONG_TYPE cerr << "NTL_LONG_LONG_TYPE: "; cerr << make_string(NTL_LONG_LONG_TYPE) << "\n"; #endif #ifdef NTL_UNSIGNED_LONG_LONG_TYPE cerr << "NTL_UNSIGNED_LONG_LONG_TYPE: "; cerr << make_string(NTL_UNSIGNED_LONG_LONG_TYPE) << "\n"; #endif #ifdef NTL_CXX_ONLY cerr << "NTL_CXX_ONLY\n"; #endif #ifdef NTL_X86_FIX cerr << "NTL_X86_FIX\n"; #endif #ifdef NTL_NO_X86_FIX cerr << "NTL_NO_X86_FIX\n"; #endif #ifdef NTL_NO_INIT_TRANS cerr << "NTL_NO_INIT_TRANS\n"; #endif #ifdef NTL_CLEAN_INT cerr << "NTL_CLEAN_INT\n"; #endif #ifdef NTL_CLEAN_PTR cerr << "NTL_CLEAN_PTR\n"; #endif #ifdef NTL_RANGE_CHECK cerr << "NTL_RANGE_CHECK\n"; #endif cerr << "\n"; cerr << "Resolution of double-word types:\n"; cerr << make_string(NTL_LL_TYPE) << "\n"; cerr << make_string(NTL_ULL_TYPE) << "\n"; cerr << "\n"; cerr << "Performance Options:\n"; #ifdef NTL_LONG_LONG cerr << "NTL_LONG_LONG\n"; #endif #ifdef NTL_AVOID_FLOAT cerr << "NTL_AVOID_FLOAT\n"; #endif #ifdef NTL_SPMM_UL cerr << "NTL_SPMM_UL\n"; #endif #ifdef NTL_SPMM_ULL cerr << "NTL_SPMM_ULL\n"; #endif #ifdef NTL_SPMM_ASM cerr << "NTL_SPMM_ASM\n"; #endif #ifdef NTL_AVOID_BRANCHING cerr << "NTL_AVOID_BRANCHING\n"; #endif #ifdef NTL_TBL_REM cerr << "NTL_TBL_REM\n"; #endif #ifdef NTL_GF2X_ALTCODE cerr << "NTL_GF2X_ALTCODE\n"; #endif #ifdef NTL_GF2X_ALTCODE1 cerr << "NTL_GF2X_ALTCODE1\n"; #endif #ifdef NTL_GF2X_NOINLINE cerr << "NTL_GF2X_NOINLINE\n"; #endif cerr << "\n\n"; if (_ntl_gmp_hack) cerr << "using GMP hack\n\n"; cerr << "running tests..."; long n, k; n = 200; k = 10*NTL_ZZ_NBITS; ZZ p; GenPrime(p, k); ZZ_p::init(p); // initialization ZZ_pX f, g, h, r1, r2, r3; random(g, n); // g = random polynomial of degree < n random(h, n); // h = " " random(f, n); // f = " " // SetCoeff(f, n); // Sets coefficient of X^n to 1 ZZ_p lc; do { random(lc); } while (IsZero(lc)); SetCoeff(f, n, lc); // For doing arithmetic mod f quickly, one must pre-compute // some information. ZZ_pXModulus F; build(F, f); PlainMul(r1, g, h); // this uses classical arithmetic PlainRem(r1, r1, f); MulMod(r2, g, h, F); // this uses the FFT MulMod(r3, g, h, f); // uses FFT, but slower // compare the results... if (r1 != r2) { cerr << "r1 != r2!!\n"; return 1; } else if (r1 != r3) { cerr << "r1 != r3!!\n"; return 1; } // small prime tests...I've made some changes in v5.3 // that should be checked on various platforms, so // we might as well check them here. if (SmallModulusTest(17, 1000)) { cerr << "first SmallModulusTest failed!!\n"; return 1; } if (SmallModulusTest((1L << (NTL_SP_NBITS))-1, 1000)) { cerr << "second SmallModulusTest failed!!\n"; return 1; } // Test gf2x code.... if (GF2X_test()) { cerr << "GF2X test failed!\n"; return 1; } cerr << "OK\n"; ZZ x1, x2, x3, x4; double t; long i; RandomLen(x1, 1024); RandomBnd(x2, x1); RandomBnd(x3, x1); mul(x4, x2, x3); t = GetTime(); for (i = 0; i < 100000; i++) mul(x4, x2, x3); t = GetTime()-t; cerr << "time for 1024-bit mul: " << t*10 << "us"; if (_ntl_gmp_hack) { _ntl_gmp_hack = 0; mul(x4, x2, x3); t = GetTime(); for (i = 0; i < 100000; i++) mul(x4, x2, x3); t = GetTime()-t; cerr << " (" << (t*10) << "us without GMP)"; _ntl_gmp_hack = 1; } cerr << "\n"; rem(x2, x4, x1); t = GetTime(); for (i = 0; i < 100000; i++) rem(x2, x4, x1); t = GetTime()-t; cerr << "time for 2048/1024-bit rem: " << t*10 << "us"; if (_ntl_gmp_hack) { _ntl_gmp_hack = 0; rem(x2, x4, x1); t = GetTime(); for (i = 0; i < 100000; i++) rem(x2, x4, x1); t = GetTime()-t; cerr << " (" << (t*10) << "us without GMP)"; _ntl_gmp_hack = 1; } cerr << "\n"; GenPrime(p, 1024); RandomBnd(x1, p); if (IsZero(x1)) set(x1); InvMod(x2, x1, p); t = GetTime(); for (i = 0; i < 1000; i++) InvMod(x2, x1, p); t = GetTime()-t; cerr << "time for 1024-bit modular inverse: " << t*1000 << "us"; if (_ntl_gmp_hack) { _ntl_gmp_hack = 0; InvMod(x2, x1, p); t = GetTime(); for (i = 0; i < 1000; i++) InvMod(x2, x1, p); t = GetTime()-t; cerr << " (" << (t*1000) << "us without GMP)"; _ntl_gmp_hack = 1; } cerr << "\n"; // test modulus switching n = 1024; k = 1024; RandomLen(p, k); ZZ_p::init(p); ZZ_pInfo->check(); ZZ_pX j1, j2, j3; random(j1, n); random(j2, n); t = GetTime(); for (i = 0; i < 20; i++) mul(j3, j1, j2); t = GetTime()-t; cerr << "time to multiply degree 1023 polynomials\n modulo a 1024-bit number: "; cerr << (t/20) << "s"; if (_ntl_gmp_hack) { _ntl_gmp_hack = 0; ZZ_p::init(p); ZZ_pInfo->check(); t = GetTime(); for (i = 0; i < 20; i++) mul(j3, j1, j2); t = GetTime()-t; cerr << " (" << (t/20) << "s without GMP)"; _ntl_gmp_hack = 1; } cerr << "\n"; GF2X_time(); return 0; }
int main() { #if (defined(NTL_CRT_ALTCODE) && !(defined(NTL_HAVE_LL_TYPE) && NTL_ZZ_NBITS == NTL_BITS_PER_LONG)) { printf("999999999999999 "); print_flag(); return 0; } #endif SetSeed(ZZ(0)); long n, k; n = 1024; k = 30*NTL_SP_NBITS; ZZ p; RandomLen(p, k); if (!IsOdd(p)) p++; ZZ_p::init(p); // initialization ZZ_pX f, g, h, r1, r2, r3; random(g, n); // g = random polynomial of degree < n random(h, n); // h = " " random(f, n); // f = " " SetCoeff(f, n); // Sets coefficient of X^n to 1 // For doing arithmetic mod f quickly, one must pre-compute // some information. ZZ_pXModulus F; build(F, f); PlainMul(r1, g, h); // this uses classical arithmetic PlainRem(r1, r1, f); MulMod(r2, g, h, F); // this uses the FFT MulMod(r3, g, h, f); // uses FFT, but slower // compare the results... if (r1 != r2) { printf("999999999999999 "); print_flag(); return 0; } else if (r1 != r3) { printf("999999999999999 "); print_flag(); return 0; } double t; long i; long iter; ZZ_pX a, b, c; random(a, n); random(b, n); long da = deg(a); long db = deg(b); long dc = da + db; long l = NextPowerOfTwo(dc+1); FFTRep arep, brep, crep; ToFFTRep(arep, a, l, 0, da); ToFFTRep(brep, b, l, 0, db); mul(crep, arep, brep); ZZ_pXModRep modrep; FromFFTRep(modrep, crep); FromZZ_pXModRep(c, modrep, 0, dc); iter = 1; do { t = GetTime(); for (i = 0; i < iter; i++) { FromZZ_pXModRep(c, modrep, 0, dc); } t = GetTime() - t; iter = 2*iter; } while(t < 1); iter = iter/2; iter = long((3/t)*iter) + 1; double tvec[5]; long w; for (w = 0; w < 5; w++) { t = GetTime(); for (i = 0; i < iter; i++) { FromZZ_pXModRep(c, modrep, 0, dc); } t = GetTime() - t; tvec[w] = t; } t = clean_data(tvec); t = floor((t/iter)*1e12); // The following is just to test some tuning Wizard logic -- // be sure to get rid of this!! #if (defined(NTL_CRT_ALTCODE)) // t *= 1.12; #endif if (t < 0 || t >= 1e15) printf("999999999999999 "); else printf("%015.0f ", t); printf(" [%ld] ", iter); print_flag(); return 0; }