/* ** This is based on the simple Fractal / Divide and Conquer / Karatsuba ** O(n^1.585) method. ** ** It's fairly simple. a*b is: a1b1(B^2+B)+(a1-a2)(b2-b1)B+a2b2(B+1) ** ** You need 4*SIZE storage for the product and the working space. */ static void FractalMul(BigInt Prod, BigInt Num1, BigInt Num2, size_t Len) {size_t HLen; int Sign1, Sign2; BigInt offset; BigInt Work=Prod+2*Len; int OldNum1IsCached, OldNum2IsCached; int OldSaveNum1FFT, OldSaveNum2FFT; if (Len <= FFTLimit) {FFTMul(Prod,Num1,Num2,Len,Len*2,0);return;} HLen = Len/2; OldNum1IsCached=Num1IsCached;OldNum2IsCached=Num2IsCached; OldSaveNum1FFT=SaveNum1FFT;OldSaveNum2FFT=SaveNum2FFT; Num1IsCached=Num2IsCached=SaveNum1FFT=SaveNum2FFT=0; if (Num1==Num2) { Sign1 = Sub(Prod, Num1, Num1+HLen, HLen); if (Sign1) Negate(Prod, HLen); FractalMul(Work,Prod,Prod,HLen); ClearBigInt(Prod,Len*2); offset = Prod + HLen; RippleSub(Prod,offset,offset,Work,Len); /* square makes sign1 pos */ } else { /* Do x=Right(Num1)-Left(Num1) y=Left(Num2)-Right(Num2) */ Sign1 = Sub(Prod, Num1+HLen, Num1, HLen); if (Sign1) Negate(Prod, HLen); Sign2 = Sub(Prod+HLen, Num2, Num2+HLen, HLen); if (Sign2) Negate(Prod+HLen, HLen); FractalMul(Work, Prod, Prod+HLen, HLen); ClearBigInt(Prod,Len*2); offset = Prod + HLen; if (Sign1 == Sign2) RippleAdd(Prod,offset,offset,Work,Len); else RippleSub(Prod,offset,offset,Work,Len); } #if 1 /* Turn the FFT/NTT caching back on. */ Num1IsCached=OldNum1IsCached;Num2IsCached=OldNum2IsCached; SaveNum1FFT=OldSaveNum1FFT;SaveNum2FFT=OldSaveNum2FFT; #endif FractalMul(Work, Num1, Num2, HLen); offset = Prod + HLen;RippleAdd(Prod,offset,offset,Work,Len); Add(Prod, Prod, Work, Len); FractalMul(Work, Num1 + HLen, Num2 + HLen, HLen); offset = Prod + HLen;RippleAdd(Prod,offset,offset,Work,Len); offset = Prod + Len; RippleAdd(Prod,offset,offset,Work,Len); Num1IsCached=OldNum1IsCached;Num2IsCached=OldNum2IsCached; SaveNum1FFT=OldSaveNum1FFT;SaveNum2FFT=OldSaveNum2FFT; }
NTL_CLIENT #define make_string_aux(x) #x #define make_string(x) make_string_aux(x) int SmallModulusTest(long p, long n) { zz_pBak bak; bak.save(); zz_p::init(p); zz_pX a, b, c, cc; random(a, n); random(b, n); PlainMul(c, a, b); FFTMul(cc, a, b); int res; res = (c != cc); bak.restore(); return res; }
int main() { _newntl_gmp_hack = 0; long n, k; n = 200; k = 10*newNTL_ZZ_NBITS; ZZ p; RandomLen(p, k); ZZ_p::init(p); // initialization ZZ_pX f, g, h, r1, r2, r3; random(g, n); // g = random polynomial of degree < n random(h, n); // h = " " random(f, n); // f = " " SetCoeff(f, n); // Sets coefficient of X^n to 1 // For doing arithmetic mod f quickly, one must pre-compute // some information. ZZ_pXModulus F; build(F, f); PlainMul(r1, g, h); // this uses classical arithmetic PlainRem(r1, r1, f); MulMod(r2, g, h, F); // this uses the FFT MulMod(r3, g, h, f); // uses FFT, but slower // compare the results... if (r1 != r2) { printf("999999999999999 "); print_flag(); return 0; } else if (r1 != r3) { printf("999999999999999 "); print_flag(); return 0; } double t; long i; long iter; n = 1024; k = 1024; RandomLen(p, k); ZZ_p::init(p); ZZ_pX j1, j2, j3; random(j1, n); random(j2, n); mul(j3, j1, j2); iter = 1; do { t = GetTime(); for (i = 0; i < iter; i++) { FFTMul(j3, j1, j2); } t = GetTime() - t; iter = 2*iter; } while(t < 1); iter = iter/2; iter = long((2/t)*iter) + 1; double tvec[5]; long w; for (w = 0; w < 5; w++) { t = GetTime(); for (i = 0; i < iter; i++) { FFTMul(j3, j1, j2); } t = GetTime() - t; tvec[w] = t; } t = clean_data(tvec); t = floor((t/iter)*1e12); if (t < 0 || t >= 1e15) printf("999999999999999 "); else printf("%015.0f ", t); printf(" [%ld] ", iter); print_flag(); return 0; }
NTL_CLIENT /*------------------------------------------------------------*/ /* if opt = 1, runs a check */ /* else, runs timings */ /*------------------------------------------------------------*/ void check(int opt){ CTFT_multipliers mult = CTFT_multipliers(0); for (long i = 1; i < 15; i++){ long len = 1L << i; long *a = new long[len]; long *b = new long[len]; long *a2 = new long[len]; long *b2 = new long[len]; long *c = new long[len]; long *c2 = new long[len]; long *wk = new long[2*len]; CTFT_init_multipliers(mult, i); for (long j = 0; j < len; j++){ a[j] = random_zz_p().LoopHole(); b[j] = random_zz_p().LoopHole(); a2[j] = a[j]; b2[j] = b[j]; } if (opt == 1){ zz_pX A, B, C, M; for (long j = 0; j < len; j++){ SetCoeff(A, j, a[j]); SetCoeff(B, j, b[j]); } SetCoeff(M, 0, 1); SetCoeff(M, len, 1); CTFT_negacyclic_convolution(c, a, b, i, mult, wk, wk+len); C = (A*B) % M; for (long j = 0; j < len; j++) assert (c[j] == coeff(C, j)); cout << i << endl; } else{ cout << i; double t = GetTime(); for (long j = 0; j < 100000; j++) CTFT_negacyclic_convolution(c, a, b, i, mult, wk, wk+len); t = GetTime()-t; cout << " " << t; zz_pX A, B, C; for (long j = 0; j < len/2; j++){ SetCoeff(A, j, a[j]); SetCoeff(B, j, b[j]); } double v = GetTime(); for (long j = 0; j < 100000; j++) FFTMul(C, A, B); v = GetTime()-v; cout << " " << v; cout << endl; } delete[] a; delete[] b; delete[] a2; delete[] b2; delete[] c; delete[] c2; delete[] wk; } }
int main() { SetSeed(ZZ(0)); cerr << "This is NTL version " << NTL_VERSION << "\n"; cerr << "Hardware charactersitics:\n"; cerr << "NTL_BITS_PER_LONG = " << NTL_BITS_PER_LONG << "\n"; cerr << "NTL_ZZ_NBITS = " << NTL_ZZ_NBITS << "\n"; cerr << "NTL_SP_NBITS = " << NTL_SP_NBITS << "\n"; #ifdef NTL_HAVE_LL_TYPE cerr << "NTL_HAVE_LL_TYPE\n"; #endif #ifdef NTL_LONGDOUBLE_SP_MULMOD cerr << "NTL_LONGDOUBLE_SP_MULMOD\n"; #endif #ifdef NTL_LONGLONG_SP_MULMOD cerr << "NTL_LONGLONG_SP_MULMOD\n"; #endif cerr << "\n"; cerr << "Basic Configuration Options:\n"; #ifdef NTL_LEGACY_NO_NAMESPACE cerr << "NTL_LEGACY_NO_NAMESPACE\n"; #endif #ifdef NTL_LEGACY_INPUT_ERROR cerr << "NTL_LEGACY_INPUT_ERROR\n"; #endif #ifdef NTL_THREADS cerr << "NTL_THREADS\n"; #endif #ifdef NTL_EXCEPTIONS cerr << "NTL_EXCEPTIONS\n"; #endif #ifdef NTL_THREAD_BOOST cerr << "NTL_THREAD_BOOST\n"; #endif #ifdef NTL_LEGACY_SP_MULMOD cout << "NTL_LEGACY_SP_MULMOD\n"; #endif #ifdef NTL_DISABLE_LONGDOUBLE cout << "NTL_DISABLE_LONGDOUBLE\n"; #endif #ifdef NTL_DISABLE_LONGLONG cout << "NTL_DISABLE_LONGLONG\n"; #endif #ifdef NTL_MAXIMIZE_SP_NBITS cout << "NTL_MAXIMIZE_SP_NBITS\n"; #endif #ifdef NTL_GMP_LIP cerr << "NTL_GMP_LIP\n"; #endif #ifdef NTL_GF2X_LIB cerr << "NTL_GF2X_LIB\n"; #endif #ifdef NTL_PCLMUL cerr << "NTL_PCLMUL\n"; #endif #ifdef NTL_LONG_LONG_TYPE cerr << "NTL_LONG_LONG_TYPE: "; cerr << make_string(NTL_LONG_LONG_TYPE) << "\n"; #endif #ifdef NTL_UNSIGNED_LONG_LONG_TYPE cerr << "NTL_UNSIGNED_LONG_LONG_TYPE: "; cerr << make_string(NTL_UNSIGNED_LONG_LONG_TYPE) << "\n"; #endif #ifdef NTL_X86_FIX cerr << "NTL_X86_FIX\n"; #endif #ifdef NTL_NO_X86_FIX cerr << "NTL_NO_X86_FIX\n"; #endif #ifdef NTL_NO_INIT_TRANS cerr << "NTL_NO_INIT_TRANS\n"; #endif #ifdef NTL_CLEAN_INT cerr << "NTL_CLEAN_INT\n"; #endif #ifdef NTL_CLEAN_PTR cerr << "NTL_CLEAN_PTR\n"; #endif #ifdef NTL_RANGE_CHECK cerr << "NTL_RANGE_CHECK\n"; #endif cerr << "\n"; cerr << "Resolution of double-word types:\n"; cerr << make_string(NTL_LL_TYPE) << "\n"; cerr << make_string(NTL_ULL_TYPE) << "\n"; cerr << "\n"; cerr << "Performance Options:\n"; #ifdef NTL_LONG_LONG cerr << "NTL_LONG_LONG\n"; #endif #ifdef NTL_AVOID_FLOAT cerr << "NTL_AVOID_FLOAT\n"; #endif #ifdef NTL_SPMM_ULL cerr << "NTL_SPMM_ULL\n"; #endif #ifdef NTL_SPMM_ASM cerr << "NTL_SPMM_ASM\n"; #endif #ifdef NTL_AVOID_BRANCHING cerr << "NTL_AVOID_BRANCHING\n"; #endif #ifdef NTL_FFT_BIGTAB cout << "NTL_FFT_BIGTAB\n"; #endif #ifdef NTL_FFT_LAZYMUL cout << "NTL_FFT_LAZYMUL\n"; #endif #ifdef NTL_TBL_REM cerr << "NTL_TBL_REM\n"; #endif #ifdef NTL_TBL_REM_LL cerr << "NTL_TBL_REM_LL\n"; #endif #ifdef NTL_CRT_ALTCODE cerr << "NTL_CRT_ALTCODE\n"; #endif #ifdef NTL_CRT_ALTCODE_SMALL cerr << "NTL_CRT_ALTCODE_SMALL\n"; #endif #ifdef NTL_GF2X_ALTCODE cerr << "NTL_GF2X_ALTCODE\n"; #endif #ifdef NTL_GF2X_ALTCODE1 cerr << "NTL_GF2X_ALTCODE1\n"; #endif #ifdef NTL_GF2X_NOINLINE cerr << "NTL_GF2X_NOINLINE\n"; #endif cerr << "\n\n"; cerr << "running tests"; long n, k, i; n = 250; k = 16000; ZZ p; for (i = 0; i < 15; i++) { // cerr << n << "/" << k; cerr << "."; RandomLen(p, k); ZZ_p::init(p); ZZ_pX a, b, c, c1; random(a, n); random(b, n); FFTMul(c, a, b); //cerr << ZZ_pInfo->FFTInfo->NumPrimes; c1 = conv<ZZ_pX>( KarMul( conv<ZZX>(a), conv<ZZX>(b) ) ); if (c1 != c) { cerr << "ZZ_pX mul failed!\n"; return 1; } n = long(n * 1.35); k = long(k / 1.414); } // small prime tests...I've made some changes in v5.3 // that should be checked on various platforms, so // we might as well check them here. if (SmallModulusTest(17, 1000)) { cerr << "first SmallModulusTest failed!!\n"; return 1; } if (SmallModulusTest((1L << (NTL_SP_NBITS))-1, 1000)) { cerr << "second SmallModulusTest failed!!\n"; return 1; } // Test gf2x code.... if (GF2X_test()) { cerr << "GF2X test failed!\n"; return 1; } cerr << "OK\n"; ZZ x1, x2, x3, x4; double t; RandomLen(x1, 1024); RandomBnd(x2, x1); RandomBnd(x3, x1); mul(x4, x2, x3); t = GetTime(); for (i = 0; i < 100000; i++) mul(x4, x2, x3); t = GetTime()-t; cerr << "time for 1024-bit mul: " << t*10 << "us"; cerr << "\n"; rem(x2, x4, x1); t = GetTime(); for (i = 0; i < 100000; i++) rem(x2, x4, x1); t = GetTime()-t; cerr << "time for 2048/1024-bit rem: " << t*10 << "us"; cerr << "\n"; GenPrime(p, 1024); RandomBnd(x1, p); if (IsZero(x1)) set(x1); InvMod(x2, x1, p); t = GetTime(); for (i = 0; i < 1000; i++) InvMod(x2, x1, p); t = GetTime()-t; cerr << "time for 1024-bit modular inverse: " << t*1000 << "us"; cerr << "\n"; // test modulus switching n = 1024; k = 1024; RandomLen(p, k); ZZ_p::init(p); if (!IsOdd(p)) p++; ZZ_pX j1, j2, j3; random(j1, n); random(j2, n); mul(j3, j1, j2); t = GetTime(); for (i = 0; i < 200; i++) mul(j3, j1, j2); t = GetTime()-t; cerr << "time to multiply degree 1023 polynomials\n modulo a 1024-bit number: "; cerr << (t/200) << "s"; cerr << "\n"; GF2X_time(); return 0; }