void KarSqr(ZZX& c, const ZZX& a) { if (IsZero(a)) { clear(c); return; } vec_ZZ mem; const ZZ *ap; ZZ *cp; long sa = a.rep.length(); if (&a == &c) { mem = a.rep; ap = mem.elts(); } else ap = a.rep.elts(); c.rep.SetLength(sa+sa-1); cp = c.rep.elts(); long maxa, xover; maxa = MaxBits(a); xover = 2; if (sa < xover) PlainSqr(cp, ap, sa); else { /* karatsuba */ long n, hn, sp, depth; n = sa; sp = 0; depth = 0; do { hn = (n+1) >> 1; sp += hn+hn+hn - 1; n = hn; depth++; } while (n >= xover); ZZVec stk; stk.SetSize(sp, ((2*maxa + NumBits(sa) + 2*depth + 10) + NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS); KarSqr(cp, ap, sa, stk.elts()); } c.normalize(); }
void solve1(ZZ& d_out, vec_ZZ& x_out, const mat_ZZ& A, const vec_ZZ& b) { long n = A.NumRows(); if (A.NumCols() != n) Error("solve1: nonsquare matrix"); if (b.length() != n) Error("solve1: dimension mismatch"); if (n == 0) { set(d_out); x_out.SetLength(0); return; } ZZ num_bound, den_bound; hadamard(num_bound, den_bound, A, b); if (den_bound == 0) { clear(d_out); return; } zz_pBak zbak; zbak.save(); long i; long j; ZZ prod; prod = 1; mat_zz_p B; for (i = 0; ; i++) { zz_p::FFTInit(i); mat_zz_p AA, BB; zz_p dd; conv(AA, A); inv(dd, BB, AA); if (dd != 0) { transpose(B, BB); break; } mul(prod, prod, zz_p::modulus()); if (prod > den_bound) { d_out = 0; return; } } long max_A_len = MaxBits(A); long use_double_mul1 = 0; long use_double_mul2 = 0; long double_limit = 0; if (max_A_len + NTL_SP_NBITS + NumBits(n) <= NTL_DOUBLE_PRECISION-1) use_double_mul1 = 1; if (!use_double_mul1 && max_A_len+NTL_SP_NBITS+2 <= NTL_DOUBLE_PRECISION-1) { use_double_mul2 = 1; double_limit = (1L << (NTL_DOUBLE_PRECISION-1-max_A_len-NTL_SP_NBITS)); } long use_long_mul1 = 0; long use_long_mul2 = 0; long long_limit = 0; if (max_A_len + NTL_SP_NBITS + NumBits(n) <= NTL_BITS_PER_LONG-1) use_long_mul1 = 1; if (!use_long_mul1 && max_A_len+NTL_SP_NBITS+2 <= NTL_BITS_PER_LONG-1) { use_long_mul2 = 1; long_limit = (1L << (NTL_BITS_PER_LONG-1-max_A_len-NTL_SP_NBITS)); } if (use_double_mul1 && use_long_mul1) use_long_mul1 = 0; else if (use_double_mul1 && use_long_mul2) use_long_mul2 = 0; else if (use_double_mul2 && use_long_mul1) use_double_mul2 = 0; else if (use_double_mul2 && use_long_mul2) { if (long_limit > double_limit) use_double_mul2 = 0; else use_long_mul2 = 0; } double **double_A; double *double_h; typedef double *double_ptr; if (use_double_mul1 || use_double_mul2) { double_h = NTL_NEW_OP double[n]; double_A = NTL_NEW_OP double_ptr[n]; if (!double_h || !double_A) Error("solve1: out of mem"); for (i = 0; i < n; i++) { double_A[i] = NTL_NEW_OP double[n]; if (!double_A[i]) Error("solve1: out of mem"); } for (i = 0; i < n; i++) for (j = 0; j < n; j++) double_A[j][i] = to_double(A[i][j]); }
void CharPoly(ZZX& gg, const mat_ZZ& a, long deterministic) { long n = a.NumRows(); if (a.NumCols() != n) LogicError("CharPoly: nonsquare matrix"); if (n == 0) { set(gg); return; } if (n == 1) { ZZ t; SetX(gg); negate(t, a(1, 1)); SetCoeff(gg, 0, t); return; } long bound = 2 + CharPolyBound(a); zz_pBak bak; bak.save(); ZZ_pBak bak1; bak1.save(); ZZX g; ZZ prod; clear(g); set(prod); long i; long instable = 1; long gp_cnt = 0; for (i = 0; ; i++) { if (NumBits(prod) > bound) break; if (!deterministic && !instable && bound > 1000 && NumBits(prod) < 0.25*bound) { long plen = 90 + NumBits(max(bound, MaxBits(g))); ZZ P; GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++)); ZZ_p::init(P); mat_ZZ_p A; ZZ_pX G; conv(A, a); CharPoly(G, A); if (CRT(g, prod, G)) instable = 1; else break; } zz_p::FFTInit(i); mat_zz_p A; zz_pX G; conv(A, a); CharPoly(G, A); instable = CRT(g, prod, G); } gg = g; bak.restore(); bak1.restore(); }
NTL_START_IMPL void CharPolyMod(ZZX& gg, const ZZX& a, const ZZX& f, long deterministic) { if (!IsOne(LeadCoeff(f)) || deg(f) < 1 || deg(a) >= deg(f)) Error("CharPolyMod: bad args"); if (IsZero(a)) { clear(gg); SetCoeff(gg, deg(f)); return; } long bound = 2 + CharPolyBound(a, f); long gp_cnt = 0; zz_pBak bak; bak.save(); ZZ_pBak bak1; bak1.save(); ZZX g; ZZ prod; clear(g); set(prod); long i; long instable = 1; for (i = 0; ; i++) { if (NumBits(prod) > bound) break; if (!deterministic && !instable && bound > 1000 && NumBits(prod) < 0.25*bound) { long plen = 90 + NumBits(max(bound, MaxBits(g))); ZZ P; GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++)); ZZ_p::init(P); ZZ_pX G, A, F; conv(A, a); conv(F, f); CharPolyMod(G, A, F); if (CRT(g, prod, G)) instable = 1; else break; } zz_p::FFTInit(i); zz_pX G, A, F; conv(A, a); conv(F, f); CharPolyMod(G, A, F); instable = CRT(g, prod, G); } gg = g; bak.restore(); bak1.restore(); }
void solve1(ZZ& d_out, vec_ZZ& x_out, const mat_ZZ& A, const vec_ZZ& b) { long n = A.NumRows(); if (A.NumCols() != n) LogicError("solve1: nonsquare matrix"); if (b.length() != n) LogicError("solve1: dimension mismatch"); if (n == 0) { set(d_out); x_out.SetLength(0); return; } ZZ num_bound, den_bound; hadamard(num_bound, den_bound, A, b); if (den_bound == 0) { clear(d_out); return; } zz_pBak zbak; zbak.save(); long i; long j; ZZ prod; prod = 1; mat_zz_p B; for (i = 0; ; i++) { zz_p::FFTInit(i); mat_zz_p AA, BB; zz_p dd; conv(AA, A); inv(dd, BB, AA); if (dd != 0) { transpose(B, BB); break; } mul(prod, prod, zz_p::modulus()); if (prod > den_bound) { d_out = 0; return; } } long max_A_len = MaxBits(A); long use_double_mul1 = 0; long use_double_mul2 = 0; long double_limit = 0; if (max_A_len + NTL_SP_NBITS + NumBits(n) <= NTL_DOUBLE_PRECISION-1) use_double_mul1 = 1; if (!use_double_mul1 && max_A_len+NTL_SP_NBITS+2 <= NTL_DOUBLE_PRECISION-1) { use_double_mul2 = 1; double_limit = (1L << (NTL_DOUBLE_PRECISION-1-max_A_len-NTL_SP_NBITS)); } long use_long_mul1 = 0; long use_long_mul2 = 0; long long_limit = 0; if (max_A_len + NTL_SP_NBITS + NumBits(n) <= NTL_BITS_PER_LONG-1) use_long_mul1 = 1; if (!use_long_mul1 && max_A_len+NTL_SP_NBITS+2 <= NTL_BITS_PER_LONG-1) { use_long_mul2 = 1; long_limit = (1L << (NTL_BITS_PER_LONG-1-max_A_len-NTL_SP_NBITS)); } if (use_double_mul1 && use_long_mul1) use_long_mul1 = 0; else if (use_double_mul1 && use_long_mul2) use_long_mul2 = 0; else if (use_double_mul2 && use_long_mul1) use_double_mul2 = 0; else if (use_double_mul2 && use_long_mul2) { if (long_limit > double_limit) use_double_mul2 = 0; else use_long_mul2 = 0; } double **double_A=0; double *double_h=0; Unique2DArray<double> double_A_store; UniqueArray<double> double_h_store; if (use_double_mul1 || use_double_mul2) { double_h_store.SetLength(n); double_h = double_h_store.get(); double_A_store.SetDims(n, n); double_A = double_A_store.get(); for (i = 0; i < n; i++) for (j = 0; j < n; j++) double_A[j][i] = to_double(A[i][j]); } long **long_A=0; long *long_h=0; Unique2DArray<long> long_A_store; UniqueArray<long> long_h_store; if (use_long_mul1 || use_long_mul2) { long_h_store.SetLength(n); long_h = long_h_store.get(); long_A_store.SetDims(n, n); long_A = long_A_store.get(); for (i = 0; i < n; i++) for (j = 0; j < n; j++) long_A[j][i] = to_long(A[i][j]); } vec_ZZ x; x.SetLength(n); vec_zz_p h; h.SetLength(n); vec_ZZ e; e = b; vec_zz_p ee; vec_ZZ t; t.SetLength(n); prod = 1; ZZ bound1; mul(bound1, num_bound, den_bound); mul(bound1, bound1, 2); while (prod <= bound1) { conv(ee, e); mul(h, B, ee); if (use_double_mul1) { for (i = 0; i < n; i++) double_h[i] = to_double(rep(h[i])); double_MixedMul1(t, double_h, double_A, n); } else if (use_double_mul2) { for (i = 0; i < n; i++) double_h[i] = to_double(rep(h[i])); double_MixedMul2(t, double_h, double_A, n, double_limit); } else if (use_long_mul1) { for (i = 0; i < n; i++) long_h[i] = to_long(rep(h[i])); long_MixedMul1(t, long_h, long_A, n); } else if (use_long_mul2) { for (i = 0; i < n; i++) long_h[i] = to_long(rep(h[i])); long_MixedMul2(t, long_h, long_A, n, long_limit); } else MixedMul(t, h, A); // t = h*A SubDiv(e, t, zz_p::modulus()); // e = (e-t)/p MulAdd(x, prod, h); // x = x + prod*h mul(prod, prod, zz_p::modulus()); } vec_ZZ num, denom; ZZ d, d_mod_prod, tmp1; num.SetLength(n); denom.SetLength(n); d = 1; d_mod_prod = 1; for (i = 0; i < n; i++) { rem(x[i], x[i], prod); MulMod(x[i], x[i], d_mod_prod, prod); if (!ReconstructRational(num[i], denom[i], x[i], prod, num_bound, den_bound)) LogicError("solve1 internal error: rat recon failed!"); mul(d, d, denom[i]); if (i != n-1) { if (denom[i] != 1) { div(den_bound, den_bound, denom[i]); mul(bound1, num_bound, den_bound); mul(bound1, bound1, 2); div(tmp1, prod, zz_p::modulus()); while (tmp1 > bound1) { prod = tmp1; div(tmp1, prod, zz_p::modulus()); } rem(tmp1, denom[i], prod); rem(d_mod_prod, d_mod_prod, prod); MulMod(d_mod_prod, d_mod_prod, tmp1, prod); } } } tmp1 = 1; for (i = n-1; i >= 0; i--) { mul(num[i], num[i], tmp1); mul(tmp1, tmp1, denom[i]); } x_out.SetLength(n); for (i = 0; i < n; i++) { x_out[i] = num[i]; } d_out = d; }
void KarMul(ZZX& c, const ZZX& a, const ZZX& b) { if (IsZero(a) || IsZero(b)) { clear(c); return; } if (&a == &b) { KarSqr(c, a); return; } vec_ZZ mem; const ZZ *ap, *bp; ZZ *cp; long sa = a.rep.length(); long sb = b.rep.length(); if (&a == &c) { mem = a.rep; ap = mem.elts(); } else ap = a.rep.elts(); if (&b == &c) { mem = b.rep; bp = mem.elts(); } else bp = b.rep.elts(); c.rep.SetLength(sa+sb-1); cp = c.rep.elts(); long maxa, maxb, xover; maxa = MaxBits(a); maxb = MaxBits(b); xover = 2; if (sa < xover || sb < xover) PlainMul(cp, ap, sa, bp, sb); else { /* karatsuba */ long n, hn, sp, depth; n = max(sa, sb); sp = 0; depth = 0; do { hn = (n+1) >> 1; sp += (hn << 2) - 1; n = hn; depth++; } while (n >= xover); ZZVec stk; stk.SetSize(sp, ((maxa + maxb + NumBits(min(sa, sb)) + 2*depth + 10) + NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS); KarMul(cp, ap, sa, bp, sb, stk.elts()); } c.normalize(); }
void mymult(){ ZZX mya, myb, c0, c1, x; ZZ q; int k = to_long(euler_toient(to_ZZ(Modulus_M))); GenPrime(q, Max_Prime); RandomPolyGen(mya, k, 1, q); RandomPolyGen(myb, k, Max_Prime, q); long da = deg(mya); long db = deg(myb); long bound = 2 + NumBits(min(da, db)+1) + MaxBits(mya) + MaxBits(myb); ZZ prod; set(prod); int prime_num = GetPrimeNumber(bound, prod); cout << prime_num << endl; long mk = NextPowerOfTwo(2*da+1); zz_p::FFTInit(0); long p = zz_p::modulus(); fftRep R1[prime_num]; fftRep R2[prime_num]; fftRep R3[prime_num]; fftRep R4[prime_num]; int size = 256; fftRep Rm[prime_num][size]; for(int i=0; i<prime_num; i++) for(int j=0; j<size; j++) Rm[i][j].SetSize(mk); for(int i=0; i<prime_num; i++){ zz_p::FFTInit(i); R1[i].SetSize(mk); R2[i].SetSize(mk); R3[i].SetSize(mk); R4[i].SetSize(mk); } myTimer tm; tm.Start(); CalculateFFTValues(R1, mya, prime_num, db); tm.Stop(); tm.ShowTime("My FFT:\t"); CalculateFFTValues(R2, myb, prime_num, db); tm.Start(); for(int i=0; i<prime_num; i++) for(int j=0; j<size; j++) Rm[i][j] = R2[i]; for(int j=0; j<size; j++){ CalculateFFTValues(R1, mya, prime_num, db); for(int i=0; i<prime_num; i++){ zz_p::FFTInit(i); mul(R3[i], R1[i], Rm[i][j]); add(R4[i], R4[i], R3[i]); } } CalculateFFTValues(R4, myb, prime_num, db); tm.Stop(); tm.ShowTime("My FFT:\t"); }