bool ProvePrime(const ZZ& _n) { ZZ n(_n); if (n<0) abs(n,n); if (n<=1) return 0; if (n<=1000000) { // n is small so use trial division to check primality long ln = to_long(n); long end = to_long(SqrRoot(n)); PrimeSeq s; for (long p=s.next(); p<=end; p=s.next()) if ((ln%p)==0) return 0; return 1; } // check small primes PrimeSeq s; for (long p=s.next(); p<1000; p=s.next()) if (divide(n,p)) return 0; // obviously, something is missing here! return ProbPrime(n); }
void OperandStack::lcmp(){ Operand op1H, op1L, op2H, op2L; op2L = pop(); op2H = pop(); op1L = pop(); op1H = pop(); if( (op1H.type != TYPE_LONG) || (op2H.type != TYPE_LONG) ) { printf("Error type not long: :op_stack.lcmp\n"); exit(0); } int64_t val1, val2; val1 = to_long(op1H.bytes, op1L.bytes); val2 = to_long(op2H.bytes, op2L.bytes); if(val1 == val2) iconst(0); else if(val1 > val2) iconst(1); else if(val1 < val2) iconst(-1); }
zz_pEExtraInfoT::zz_pEExtraInfoT(int precompute_inverses, int precompute_square_roots, int precompute_legendre_char, int precompute_pth_frobenius_map) { int p = zz_p::modulus(); q = to_long(zz_pE::cardinality()); ref_count = 1; inv_table = precompute_inverses ? new invTable(q) : NULL; root_table = precompute_square_roots ? new rootTable(q) : NULL; legendre_table = precompute_legendre_char ? new legendreChar(q) : NULL; frob_map = precompute_pth_frobenius_map ? new frobeniusMap(q) : NULL; // precompute a non-square in F_q zz_pE x, y; do { x = random_zz_pE(); } while(x == 0 || legendre_char(x) == 1); non_square = x; // precompute image of basis 1,x^1,...,x^{d-1} under Frobenius frob_of_basis = new zz_pE[zz_pE::degree()]; x = 0; SetCoeff(x.LoopHole(), 1, 1); frob_of_basis[0] = 1; for (int i = 1; i < zz_pE::degree(); i++) { power(y, x, i); power(frob_of_basis[i], y, p); } }
quad_float to_quad_float(unsigned long n) { START_FIX DOUBLE xhi, xlo, t; DOUBLE u, v; const double bnd = double(1L << (NTL_BITS_PER_LONG-2))*4.0; xhi = double(n); if (xhi >= bnd) t = xhi - bnd; else t = xhi; // we use the "to_long" function here to be as portable as possible. long llo = to_long(n - (unsigned long)(t)); xlo = double(llo); // renormalize...just to be safe u = xhi + xlo; v = xhi - u; v = v + xlo; END_FIX return quad_float(u, v); }
void OperandStack::lmul(){ if(size < 4) { printf("Error :op_stack.lmul\n"); exit(0); } Operand opL, opH; opL = pop(); opH = pop();//multiplicador if((opH.type != TYPE_LONG) || ((top-1)->type != TYPE_LONG) ) { printf("Error type not int: :op_stack.lmul\n"); exit(0); } int64_t resultado = to_long( (top-1)->bytes, (top)->bytes ) * to_long(opH.bytes, opL.bytes); (top-1)->set_high(TYPE_LONG, &resultado); top->set_low(TYPE_LONG, &resultado); }
void cls_op::do_process(const tuple_ptr tup, int) { pre_process_hook(tup); std::vector<double> coordinates = std::vector<double>(m_value_d); for (int i = 0; i < m_value_d; i++) coordinates[i] = to_double((*tup)[i]); long timestamp = to_long(tup->op_arrival_time()); std::vector<cls_snapshot_microcluster*> *smc = m_clustream->process(coordinates, timestamp); if (smc != NULL) { cls_lbgu *lbgu = new cls_lbgu(smc, m_value_k); lbgu->start(); std::vector<cls_cluster*> result = lbgu->get_clusters(); std::vector<std::pair<int, cls_cluster*> > result_list(0); for (int i = 0; i < m_value_k; i++) result_list.push_back(std::pair<int, cls_cluster*>(i, result[i])); m_id++; if (m_first_output_done) { m_new_list = sort_clusters(m_old_list, result_list); m_difference = calculate_difference(m_old_list, m_new_list); } else { m_new_list = result_list; m_difference = result_list; m_first_output_done = true; } std::vector<std::pair<int, cls_cluster*> > output_list; if (m_output_version == 0) output_list = m_new_list; else if (m_output_version == 1) output_list = m_difference; else output_list = m_new_list; for (int i = 0; i < (int) output_list.size(); i++) { std::vector<boost::any> data(2 + m_value_d); data[0] = m_id; data[1] = output_list[i].first; for (int j = 0; j < m_value_d; j++) data[j+2] = output_list[i].second->get_coordinate(j); tuple_ptr n_tuple(new tuple(data)); publish_new(n_tuple); } m_old_list = m_new_list; } }
void SquareFreeDecomp(vec_pair_ZZ_pEX_long& u, const ZZ_pEX& ff) { ZZ_pEX f = ff; if (!IsOne(LeadCoeff(f))) LogicError("SquareFreeDecomp: bad args"); ZZ_pEX r, t, v, tmp1; long m, j, finished, done; u.SetLength(0); if (deg(f) == 0) return; m = 1; finished = 0; do { j = 1; diff(tmp1, f); GCD(r, f, tmp1); div(t, f, r); if (deg(t) > 0) { done = 0; do { GCD(v, r, t); div(tmp1, t, v); if (deg(tmp1) > 0) append(u, cons(tmp1, j*m)); if (deg(v) > 0) { div(r, r, v); t = v; j++; } else done = 1; } while (!done); if (deg(r) == 0) finished = 1; } if (!finished) { /* r is a p-th power */ long k, d; long p = to_long(ZZ_p::modulus()); d = deg(r)/p; f.rep.SetLength(d+1); for (k = 0; k <= d; k++) IterPower(f.rep[k], r.rep[k*p], ZZ_pE::degree()-1); m = m*p; } } while (!finished); }
void exp(RR& res, const RR& x) { if (x >= NTL_OVFBND || x <= -NTL_OVFBND) Error("RR: overflow"); long p = RR::precision(); // step 0: write x = n + f, n an integer and |f| <= 1/2 // careful -- we want to compute f to > p bits of precision RR f, nn; RR::SetPrecision(NTL_BITS_PER_LONG); round(nn, x); RR::SetPrecision(p + 10); sub(f, x, nn); long n = to_long(nn); // step 1: calculate t1 = e^n by repeated squaring RR::SetPrecision(p + NumBits(n) + 10); RR e; ComputeE(e); RR::SetPrecision(p + 10); RR t1; power(t1, e, n); // step 2: calculate t2 = e^f using Taylor series expansion RR::SetPrecision(p + NumBits(p) + 10); RR t2, s, s1, t; long i; s = 0; t = 1; for (i = 1; ; i++) { add(s1, s, t); if (s == s1) break; xcopy(s, s1); mul(t, t, f); div(t, t, i); } xcopy(t2, s); RR::SetPrecision(p); mul(res, t1, t2); }
long ComputeMax10Power() { RRPush push; RR::SetPrecision(NTL_BITS_PER_LONG); RR ln2, ln10; ComputeLn2(ln2); ComputeLn10(ln10); long k = to_long( to_RR(NTL_OVFBND/2) * ln2 / ln10 ); return k; }
void OperandStack::lxor(){ Operand op1, op2, opL, opH; opL = pop(); opH = pop(); op1 = pop(); op2 = pop(); if((op1.type != TYPE_LONG) || (op2.type != TYPE_LONG) || (opL.type != TYPE_LONG) || (opH.type != TYPE_LONG)) { printf("Error type not long: :op_stack.lxor\n"); exit(0); } int64_t resultado = (int64_t)to_long( op2.bytes, op1.bytes ) ^ (int64_t)to_long(opH.bytes, opL.bytes); opH.bytes = (u4)(resultado>>32); opL.bytes = (u4)resultado; push(opH); push(opL); }
int main(int argc, char **argv) { zz_pX pi_1, pi_2, a; get_modulus(pi_1, pi_2, a, 5, 2, 2); long q = to_long(zz_pE::cardinality()); cout << "q = " << q << endl; cout << "pi_1 = " << pi_1 << endl; cout << "pi_2 = " << pi_2 << endl; cout << "a = " << a << endl; }
void OperandStack::lrem(){ if(size < 4) { printf("Error :op_stack.lrem\n"); exit(0); } Operand opL, opH; opL = pop(); opH = pop(); if((opL.type != TYPE_LONG) || (opH.type != TYPE_LONG) || ((top)->type != TYPE_LONG) || ((top-1)->type != TYPE_LONG) ) { printf("Error type not int: :op_stack.ldiv\n"); exit(0); } if( (opH.bytes == 0x0) && (opL.bytes==0x0) ) { exception("ArithmeticException: / by zero at OpStack.lrem"); } int64_t resultado = to_long( (top-1)->bytes, (top)->bytes ) % to_long(opH.bytes, opL.bytes); (top-1)->set_high(TYPE_LONG, &resultado); top->set_low(TYPE_LONG, &resultado); }
quad_float exp(const quad_float& x) { // New version 97 Aug 05 /* ! Calculate a quadruple-precision exponential ! Method: ! x x.log2(e) nint[x.log2(e)] + frac[x.log2(e)] ! e = 2 = 2 ! ! iy fy ! = 2 . 2 ! Then ! fy y.loge(2) ! 2 = e ! ! Now y.loge(2) will be less than 0.3466 in absolute value. ! This is halved and a Pade aproximation is used to approximate e^x over ! the region (-0.1733, +0.1733). This approximation is then squared. */ if (x.hi<DBL_MIN_10_EXP*2.302585092994045684017991) return to_quad_float(0.0); if (x.hi>DBL_MAX_10_EXP*2.302585092994045684017991) { Error("exp(quad_float): overflow"); } // changed this from "const" to "static" in v5.3, since "const" // causes the initialization to be performed with *every* invocation. static quad_float Log2 = to_quad_float("0.6931471805599453094172321214581765680755"); quad_float y,temp,ysq,sum1,sum2; long iy; y=x/Log2; temp = floor(y+0.5); iy = to_long(temp); y=(y-temp)*Log2; y=ldexp(y,-1L); ysq=y*y; sum1=y*((((ysq+3960.0)*ysq+2162160.0)*ysq+302702400.0)*ysq+8821612800.0); sum2=(((90.0*ysq+110880.0)*ysq+30270240.0)*ysq+2075673600.0)*ysq+17643225600.0; /* ! sum2 + sum1 2.sum1 ! Now approximation = ----------- = 1 + ----------- = 1 + 2.temp ! sum2 - sum1 sum2 - sum1 ! ! Then (1 + 2.temp)^2 = 4.temp.(1 + temp) + 1 */ temp=sum1/(sum2-sum1); y=temp*(temp+1); y=ldexp(y,2L); return ldexp(y+1,iy); }
static void reduce(long k, long l, mat_ZZ& B, vec_long& P, vec_ZZ& D, vec_vec_ZZ& lam, mat_ZZ* U) { static ZZ t1; static ZZ r; if (P(l) == 0) return; add(t1, lam(k)(P(l)), lam(k)(P(l))); abs(t1, t1); if (t1 <= D[P(l)]) return; long j; long rr, small_r; BalDiv(r, lam(k)(P(l)), D[P(l)]); if (r.WideSinglePrecision()) { small_r = 1; rr = to_long(r); } else { small_r = 0; } if (small_r) { MulSubFrom(B(k), B(l), rr); if (U) MulSubFrom((*U)(k), (*U)(l), rr); for (j = 1; j <= l-1; j++) if (P(j) != 0) MulSubFrom(lam(k)(P(j)), lam(l)(P(j)), rr); MulSubFrom(lam(k)(P(l)), D[P(l)], rr); } else { MulSubFrom(B(k), B(l), r); if (U) MulSubFrom((*U)(k), (*U)(l), r); for (j = 1; j <= l-1; j++) if (P(j) != 0) MulSubFrom(lam(k)(P(j)), lam(l)(P(j)), r); MulSubFrom(lam(k)(P(l)), D[P(l)], r); } }
void OperandStack::l2f(){ Operand opL = pop(); Operand opH = pop(); if((opL.type != TYPE_LONG) || (opH.type != TYPE_LONG)) { printf("Error type not long: :op_stack.l2f\n"); exit(0); } int64_t l = to_long(opH.bytes, opL.bytes); float f = (float) l; Operand op; op.set_value(TYPE_FLOAT, &f); push(op); }
void OperandStack::l2d(){ Operand opL = pop(); Operand opH = pop(); if((opL.type != TYPE_LONG) || (opH.type != TYPE_LONG)) { printf("Error type not long: :op_stack.l2d\n"); exit(0); } int64_t l = to_long(opH.bytes, opL.bytes); double d = (double) l; opH.set_high(TYPE_DOUBLE, &d); opL.set_low(TYPE_DOUBLE, &d); push(opH); push(opL); }
quad_float to_quad_float(unsigned long n) { DOUBLE xhi, xlo, t; const double bnd = double(1L << (NTL_BITS_PER_LONG-2))*4.0; xhi = TrueDouble(n); if (xhi >= bnd) t = xhi - bnd; else t = xhi; // we use the "to_long" function here to be as portable as possible. long llo = to_long(n - (unsigned long)(t)); xlo = TrueDouble(llo); quad_float z; normalize(z, xhi, xlo); return z; }
void OperandStack::lushr(){ Operand op, opH, opL; op = pop(); opL = pop(); opH = pop(); if( (op.type != TYPE_INT) || (opH.type != TYPE_LONG) || (opL.type != TYPE_LONG)) { printf("Error type not long or shift amount not an int: op_stack.lushr\n"); exit(0); } uint64_t l = to_long(opH.bytes, opL.bytes); op.bytes &= 0x1F; l >>= op.bytes; opH.set_high(TYPE_LONG, &l); opL.set_low(TYPE_LONG, &l); push(opH); push(opL); }
PAlgebraModDerived<type>::PAlgebraModDerived(const PAlgebra& _zMStar, long _r) : zMStar(_zMStar), r(_r) { long p = zMStar.getP(); long m = zMStar.getM(); // For dry-run, use a tiny m value for the PAlgebra tables if (isDryRun()) m = (p==3)? 4 : 3; assert(r > 0); ZZ BigPPowR = power_ZZ(p, r); assert(BigPPowR.SinglePrecision()); pPowR = to_long(BigPPowR); long nSlots = zMStar.getNSlots(); RBak bak; bak.save(); SetModulus(p); // Compute the factors Ft of Phi_m(X) mod p, for all t \in T RX phimxmod; conv(phimxmod, zMStar.getPhimX()); // Phi_m(X) mod p vec_RX localFactors; EDF(localFactors, phimxmod, zMStar.getOrdP()); // equal-degree factorization RX* first = &localFactors[0]; RX* last = first + localFactors.length(); RX* smallest = min_element(first, last); swap(*first, *smallest); // We make the lexicographically smallest factor have index 0. // The remaining factors are ordered according to their representives. RXModulus F1(localFactors[0]); for (long i=1; i<nSlots; i++) { unsigned long t =zMStar.ith_rep(i); // Ft is minimal polynomial of x^{1/t} mod F1 unsigned long tInv = InvMod(t, m); // tInv = t^{-1} mod m RX X2tInv = PowerXMod(tInv,F1); // X2tInv = X^{1/t} mod F1 IrredPolyMod(localFactors[i], X2tInv, F1); } /* Debugging sanity-check #1: we should have Ft= GCD(F1(X^t),Phi_m(X)) for (i=1; i<nSlots; i++) { unsigned long t = T[i]; RX X2t = PowerXMod(t,phimxmod); // X2t = X^t mod Phi_m(X) RX Ft = GCD(CompMod(F1,X2t,phimxmod),phimxmod); if (Ft != localFactors[i]) { cout << "Ft != F1(X^t) mod Phi_m(X), t=" << t << endl; exit(0); } }*******************************************************************/ if (r == 1) { build(PhimXMod, phimxmod); factors = localFactors; pPowRContext.save(); // Compute the CRT coefficients for the Ft's crtCoeffs.SetLength(nSlots); for (long i=0; i<nSlots; i++) { RX te = phimxmod / factors[i]; // \prod_{j\ne i} Fj te %= factors[i]; // \prod_{j\ne i} Fj mod Fi InvMod(crtCoeffs[i], te, factors[i]); // \prod_{j\ne i} Fj^{-1} mod Fi } } else { PAlgebraLift(zMStar.getPhimX(), localFactors, factors, crtCoeffs, r); RX phimxmod1; conv(phimxmod1, zMStar.getPhimX()); build(PhimXMod, phimxmod1); pPowRContext.save(); } // set factorsOverZZ factorsOverZZ.resize(nSlots); for (long i = 0; i < nSlots; i++) conv(factorsOverZZ[i], factors[i]); genCrtTable(); genMaskTable(); }
// general purpose factoring method void factor(vec_pair_ZZ_long& factors, const ZZ& _n, const ZZ& bnd, double failure_prob, bool verbose) { ZZ n(_n); if (n<=1) { abs(n,n); if (n<=1) { factors.SetLength(0); return; } } // upper bound on size of smallest prime factor ZZ upper_bound; SqrRoot(upper_bound,n); if (bnd>0 && bnd<upper_bound) upper_bound=bnd; // figure out appropriate lower_bound for trial division long B1,B2,D; double prob; ECM_parameters(B1,B2,prob,D,NumBits(upper_bound),NumBits(n)); ZZ lower_bound; conv(lower_bound,max(B2,1<<14)); if (lower_bound>upper_bound) lower_bound=upper_bound; // start factoring with trial division TrialDivision(factors,n,n,to_long(lower_bound)); if (IsOne(n)) return; if (upper_bound<=lower_bound || ProbPrime_notd(n)) { addFactor(factors,n); return; } /* n is composite and smallest prime factor is assumed to be such that * lower_bound < factor <= upper_bound * * Ramp-up to searching for factors of size upper_bound. This is a good * idea in cases where we have no idea what size factors N might have, * but we don't want to spend too much time doing this. */ for(lower_bound<<=4; lower_bound<upper_bound; lower_bound<<=4) { ZZ q; ECM(q,n,lower_bound,1,verbose); // one curve only if (!IsOne(q)) { div(n,n,q); if (n<q) swap(n,q); // q is small factor, n is large factor if (ProbPrime_notd(q)) addFactor(factors,q); else factor_r(factors,q,bnd,failure_prob,verbose); if (ProbPrime_notd(n)) { addFactor(factors,n); return; } // new upper_bound SqrRoot(upper_bound,n); if (bnd>0 && bnd<upper_bound) upper_bound=bnd; } } // search for factors of size bnd factor_r(factors,n,bnd,failure_prob,verbose); }
NTL_CLIENT int main() { quad_float a, b, c, d; quad_float::SetOutputPrecision(25); if (PrecisionOK()) cout << "Precision OK\n"; else cout << "Precision not OK\n"; cin >> a; cout << a << "\n"; cin >> b; cout << b << "\n"; c = a + b; d = a; d += b; cout << c << "\n"; cout << d << "\n"; c = a - b; d = a; d -= b; cout << c << "\n"; cout << d << "\n"; c = a * b; d = a; d *= b; cout << c << "\n"; cout << d << "\n"; c = a / b; d = a; d /= b; cout << c << "\n"; cout << d << "\n"; c = -a; cout << c << "\n"; c = sqrt(a); cout << c << "\n"; power(c, to_quad_float(10), 20); cout << c << "\n"; { long n, n1; int shamt = min(NTL_BITS_PER_LONG,2*NTL_DOUBLE_PRECISION); n = to_long((1UL << (shamt-1)) - 1UL); c = to_quad_float(n); n1 = to_long(c); if (n1 == n) cout << "long conversion OK\n"; else cout << "long conversion not OK\n"; n = to_long(1UL << (shamt-1)); c = to_quad_float(n); n1 = to_long(c); if (n1 == n) cout << "long conversion OK\n"; else cout << "long conversion not OK\n"; } { unsigned long n; ZZ n1; int shamt = min(NTL_BITS_PER_LONG,2*NTL_DOUBLE_PRECISION); n = (1UL << (shamt-1)) - 1UL; c = to_quad_float(n); n1 = to_ZZ(c); if (n1 == to_ZZ(n)) cout << "ulong conversion OK\n"; else cout << "ulong conversion not OK\n"; n = 1UL << (shamt-1); c = to_quad_float(n); n1 = to_ZZ(c); if (n1 == to_ZZ(n)) cout << "ulong conversion OK\n"; else cout << "ulong conversion not OK\n"; } }
void solve1(ZZ& d_out, vec_ZZ& x_out, const mat_ZZ& A, const vec_ZZ& b) { long n = A.NumRows(); if (A.NumCols() != n) LogicError("solve1: nonsquare matrix"); if (b.length() != n) LogicError("solve1: dimension mismatch"); if (n == 0) { set(d_out); x_out.SetLength(0); return; } ZZ num_bound, den_bound; hadamard(num_bound, den_bound, A, b); if (den_bound == 0) { clear(d_out); return; } zz_pBak zbak; zbak.save(); long i; long j; ZZ prod; prod = 1; mat_zz_p B; for (i = 0; ; i++) { zz_p::FFTInit(i); mat_zz_p AA, BB; zz_p dd; conv(AA, A); inv(dd, BB, AA); if (dd != 0) { transpose(B, BB); break; } mul(prod, prod, zz_p::modulus()); if (prod > den_bound) { d_out = 0; return; } } long max_A_len = MaxBits(A); long use_double_mul1 = 0; long use_double_mul2 = 0; long double_limit = 0; if (max_A_len + NTL_SP_NBITS + NumBits(n) <= NTL_DOUBLE_PRECISION-1) use_double_mul1 = 1; if (!use_double_mul1 && max_A_len+NTL_SP_NBITS+2 <= NTL_DOUBLE_PRECISION-1) { use_double_mul2 = 1; double_limit = (1L << (NTL_DOUBLE_PRECISION-1-max_A_len-NTL_SP_NBITS)); } long use_long_mul1 = 0; long use_long_mul2 = 0; long long_limit = 0; if (max_A_len + NTL_SP_NBITS + NumBits(n) <= NTL_BITS_PER_LONG-1) use_long_mul1 = 1; if (!use_long_mul1 && max_A_len+NTL_SP_NBITS+2 <= NTL_BITS_PER_LONG-1) { use_long_mul2 = 1; long_limit = (1L << (NTL_BITS_PER_LONG-1-max_A_len-NTL_SP_NBITS)); } if (use_double_mul1 && use_long_mul1) use_long_mul1 = 0; else if (use_double_mul1 && use_long_mul2) use_long_mul2 = 0; else if (use_double_mul2 && use_long_mul1) use_double_mul2 = 0; else if (use_double_mul2 && use_long_mul2) { if (long_limit > double_limit) use_double_mul2 = 0; else use_long_mul2 = 0; } double **double_A=0; double *double_h=0; Unique2DArray<double> double_A_store; UniqueArray<double> double_h_store; if (use_double_mul1 || use_double_mul2) { double_h_store.SetLength(n); double_h = double_h_store.get(); double_A_store.SetDims(n, n); double_A = double_A_store.get(); for (i = 0; i < n; i++) for (j = 0; j < n; j++) double_A[j][i] = to_double(A[i][j]); } long **long_A=0; long *long_h=0; Unique2DArray<long> long_A_store; UniqueArray<long> long_h_store; if (use_long_mul1 || use_long_mul2) { long_h_store.SetLength(n); long_h = long_h_store.get(); long_A_store.SetDims(n, n); long_A = long_A_store.get(); for (i = 0; i < n; i++) for (j = 0; j < n; j++) long_A[j][i] = to_long(A[i][j]); } vec_ZZ x; x.SetLength(n); vec_zz_p h; h.SetLength(n); vec_ZZ e; e = b; vec_zz_p ee; vec_ZZ t; t.SetLength(n); prod = 1; ZZ bound1; mul(bound1, num_bound, den_bound); mul(bound1, bound1, 2); while (prod <= bound1) { conv(ee, e); mul(h, B, ee); if (use_double_mul1) { for (i = 0; i < n; i++) double_h[i] = to_double(rep(h[i])); double_MixedMul1(t, double_h, double_A, n); } else if (use_double_mul2) { for (i = 0; i < n; i++) double_h[i] = to_double(rep(h[i])); double_MixedMul2(t, double_h, double_A, n, double_limit); } else if (use_long_mul1) { for (i = 0; i < n; i++) long_h[i] = to_long(rep(h[i])); long_MixedMul1(t, long_h, long_A, n); } else if (use_long_mul2) { for (i = 0; i < n; i++) long_h[i] = to_long(rep(h[i])); long_MixedMul2(t, long_h, long_A, n, long_limit); } else MixedMul(t, h, A); // t = h*A SubDiv(e, t, zz_p::modulus()); // e = (e-t)/p MulAdd(x, prod, h); // x = x + prod*h mul(prod, prod, zz_p::modulus()); } vec_ZZ num, denom; ZZ d, d_mod_prod, tmp1; num.SetLength(n); denom.SetLength(n); d = 1; d_mod_prod = 1; for (i = 0; i < n; i++) { rem(x[i], x[i], prod); MulMod(x[i], x[i], d_mod_prod, prod); if (!ReconstructRational(num[i], denom[i], x[i], prod, num_bound, den_bound)) LogicError("solve1 internal error: rat recon failed!"); mul(d, d, denom[i]); if (i != n-1) { if (denom[i] != 1) { div(den_bound, den_bound, denom[i]); mul(bound1, num_bound, den_bound); mul(bound1, bound1, 2); div(tmp1, prod, zz_p::modulus()); while (tmp1 > bound1) { prod = tmp1; div(tmp1, prod, zz_p::modulus()); } rem(tmp1, denom[i], prod); rem(d_mod_prod, d_mod_prod, prod); MulMod(d_mod_prod, d_mod_prod, tmp1, prod); } } } tmp1 = 1; for (i = n-1; i >= 0; i--) { mul(num[i], num[i], tmp1); mul(tmp1, tmp1, denom[i]); } x_out.SetLength(n); for (i = 0; i < n; i++) { x_out[i] = num[i]; } d_out = d; }
// Note: poly is passed by value, not by reference, so the calling routine // keeps its original polynomial long evalPolyTopLevel(ZZX poly, long x, long p, long k=0) { if (verbose) cerr << "\n* evalPolyTopLevel: p="<<p<<", x="<<x<<", poly="<<poly; if (deg(poly)<=2) { // nothing to optimize here if (deg(poly)<1) return to_long(coeff(poly, 0)); DynamicPtxtPowers babyStep(x, p, deg(poly)); long ret = simplePolyEval(poly, babyStep, p); totalDepth = babyStep.getDepth(deg(poly)); return ret; } // How many baby steps: set k~sqrt(n/2), rounded up/down to a power of two // FIXME: There may be some room for optimization here: it may be possible // to choose k as something other than a power of two and still maintain // optimal depth, in principle we can try all possible values of k between // the two powers of two and choose the one that goves the least number // of multiplies, conditioned on minimum depth. if (k<=0) { long kk = (long) sqrt(deg(poly)/2.0); k = 1L << NextPowerOfTwo(kk); // heuristic: if k>>kk then use a smaler power of two if ((k==16 && deg(poly)>167) || (k>16 && k>(1.44*kk))) k /= 2; } cerr << ", k="<<k; long n = divc(deg(poly),k); // deg(p) = k*n +delta if (verbose) cerr << ", n="<<n<<endl; DynamicPtxtPowers babyStep(x, p, k); long x2k = babyStep.getPower(k); // Special case when deg(p)>k*(2^e -1) if (n==(1L << NextPowerOfTwo(n))) { // n is a power of two DynamicPtxtPowers giantStep(x2k, p, n/2, babyStep.getDepth(k)); if (verbose) cerr << "babyStep="<<babyStep<<", giantStep="<<giantStep<<endl; long ret = degPowerOfTwo(poly, k, babyStep, giantStep, p, totalDepth); if (verbose) { cerr << " degPowerOfTwo("<<poly<<") returns "<<ret<<", depth="<<totalDepth<<endl; if (ret != polyEvalMod(poly,babyStep[0], p)) { cerr << " ## recursive call failed, ret="<<ret<<"!=" << polyEvalMod(poly,babyStep[0], p)<<endl; exit(0); } // cerr << " babyStep depth=["; // for (long i=0; i<babyStep.size(); i++) // cerr << babyStep.getDepth(i+1)<<" "; // cerr << "]\n"; // cerr << " giantStep depth=["; // for (long i=0; i<giantStep.size(); i++) // cerr<<giantStep.getDepth(i+1)<<" "; // cerr << "]\n"; } return ret; } // If n is not a power of two, ensure that poly is monic and that // its degree is divisible by k, then call the recursive procedure ZZ topInv; // the inverse mod p of the top coefficient of poly (if any) bool divisible = (n*k == deg(poly)); // is the degree divisible by k? long nonInvertibe = InvModStatus(topInv, LeadCoeff(poly), to_ZZ(p)); // 0 if invertible, 1 if not // FIXME: There may be some room for optimization below: instead of // adding a term X^{n*k} we can add X^{n'*k} for some n'>n, so long // as n' is smaller than the next power of two. We could save a few // multiplications since giantStep[n'] may be easier to compute than // giantStep[n] when n' has fewer 1's than n in its binary expansion. long extra = 0; // extra!=0 denotes an added term extra*X^{n*k} if (!divisible || nonInvertibe) { // need to add a term // set extra = 1 - current-coeff-of-X^{n*k} extra = SubMod(1, to_long(coeff(poly,n*k)), p); SetCoeff(poly, n*k); // set the top coefficient of X^{n*k} to one topInv = to_ZZ(1); // inverse of new top coefficient is one } long t = (extra==0)? divc(n,2) : n; DynamicPtxtPowers giantStep(x2k, p, t, babyStep.getDepth(k)); if (verbose) cerr << "babyStep="<<babyStep<<", giantStep="<<giantStep<<endl; long y; // the value to return long subDepth1 =0; if (!IsOne(topInv)) { long top = to_long(poly[n*k]); // record the current top coefficient // cerr << ", top-coeff="<<top; // Multiply by topInv modulo p to make into a monic polynomial poly *= topInv; for (long i=0; i<=n*k; i++) rem(poly[i], poly[i], to_ZZ(p)); poly.normalize(); y = recursivePolyEval(poly, k, babyStep, giantStep, p, subDepth1); if (verbose) { cerr << " recursivePolyEval("<<poly<<") returns "<<y<<", depth="<<subDepth1<<endl; if (y != polyEvalMod(poly,babyStep[0], p)) { cerr << "## recursive call failed, ret="<<y<<"!=" << polyEvalMod(poly,babyStep[0], p)<<endl; exit(0); } } y = MulMod(y, top, p); // multiply by the original top coefficient } else { y = recursivePolyEval(poly, k, babyStep, giantStep, p, subDepth1); if (verbose) { cerr << " recursivePolyEval("<<poly<<") returns "<<y<<", depth="<<subDepth1<<endl; if (y != polyEvalMod(poly,babyStep[0], p)) { cerr << "## recursive call failed, ret="<<y<<"!=" << polyEvalMod(poly,babyStep[0], p)<<endl; exit(0); } } } if (extra != 0) { // if we added a term, now is the time to subtract back if (verbose) cerr << ", subtracting "<<extra<<"*X^"<<k*n; extra = MulMod(extra, giantStep.getPower(n), p); totalDepth = max(subDepth1, giantStep.getDepth(n)); y = SubMod(y, extra, p); } else totalDepth = subDepth1; if (verbose) cerr << endl; return y; }
long from_string(const string & input_value) throw(invalid_argument, out_of_range) { return to_long(input_value); }
/** * Usage: ./hw1.x <filename> <nthreads> * * <filename> (don't include the angle brackets) is the name of * a data file in the current directory containing the parameters * for the Black-Scholes simulation. It has exactly six lines * with no white space. Put each parameter one to a line, with * an endline after it. Here are the parameters: * * S * E * r * sigma * T * M * * <nthreads> (don't include the angle brackets) is the number of * worker threads to use at a time in the benchmark. The sequential * code which we supply to you doesn't use this argument; your code * will. */ int main (int argc, char* argv[]) { confidence_interval_t interval; double S, E, r, sigma, T; long M = 0; char* filename = NULL; int nthreads = 1; double t1, t2; int i; int debug_mode = 0; if (argc < 5) { fprintf (stderr, "Usage: ./hw1.x <filename> <trials:M> <nthreads> [rnd_mode] [debug_mode]\n\n"); exit (EXIT_FAILURE); } filename = argv[1]; nthreads = to_int (argv[3]); rnd_mode = to_int(argv[4]); if (argv[5]!= NULL) { debug_mode = to_int(argv[5]); } parse_parameters (&S, &E, &r, &sigma, &T, &M, filename); M = to_long (argv[2]); /* rearrange nthread and M(trials) * for the further use, we arrange M based on the number of threads * if M is not, or less than nthreads. * */ if (M < 256) { printf("Trials(M) is less than minimum requirement, 256,\n" "So, we increase M to 256\n"); M = 256; } if (nthreads > M) { printf("The number of threads is exceed to M\n" "So, we set it to M\n"); nthreads = M; } if (M % nthreads) { M = (M/nthreads+1)*nthreads; printf("nthreads and M is not balanced\n" "So, we rebalance M to muliple of nthreads\n" "M: %ld, nthreads: %d\n", M, nthreads); } /* * generate pre-generated random numbers * */ double* preRands = (double*)malloc (sizeof (double) * M); if (preRands == NULL) { printf("ERROR: Cannot allocate size of memory: %ld\n" "Begin with smaller size of M.\n", sizeof(double)*M); exit(1); } for (i = 0; i < M; i++) { preRands[i] = i /(double)M; if (debug_mode > 0 && (i < 10 || i > M-10)) printf("RND%d: %.6lf, ", i, preRands[i]); } /* * Make sure init_timer() is only called by one thread, * before all the other threads run! */ init_timer (); /* Same goes for initializing the PRNG */ init_prng (random_seed ()); /* * Run the benchmark and time it. */ t1 = get_seconds (); void** prng_stream = (void**)malloc(sizeof(void*)*nthreads); for( i = 0; i < nthreads; i++) { prng_stream[i] = spawn_prng_stream (i); } double prng_stream_spawn_time = get_seconds() - t1; /* * In the parallel case, you may want to set prng_stream_spawn_time to * the max of all the prng_stream_spawn_times, or just take a representative * sample... */ bs_return_t ret = black_scholes (&interval, S, E, r, sigma, T, M, nthreads, preRands, prng_stream, debug_mode); t2 = get_seconds (); /* * A fun fact about C string literals (i.e., strings enclosed in * double quotes) is that the C preprocessor automatically * concatenates them if they are separated only by whitespace. */ if (nthreads == 1) { printf ("Black-Scholes (Ver. Sequential) benchmark:\n"); } else if (nthreads > 1) { printf ("Black-Scholes (Ver. Threads: %d) benchmark:\n", nthreads); } printf( "--------------------------------------------\n" "Trials %ld\n" "Confidence interval:(%g, %g)\n" "Average Trials(BS) : %10lf\n" "Standard Deviation : %10lf\n" "--------------------------------------------\n" "Total simulation time (sec) : %10lf\n" "PRNG stream spawn time (sec): %10lf\n" "BS computation time (sec) : %10lf\n\n" //"S %g\n" //"E %g\n" //"r %g\n" //"sigma %g\n" //"T %g\n" //S, E, r, sigma, T , M , interval.min, interval.max , ret.mean , ret.stddev , t2 - t1 , prng_stream_spawn_time , (t2 - t1) - prng_stream_spawn_time); free(preRands); free(prng_stream); return 0; }
int64_t OperandStack::pop_l() { Operand opL = pop(); Operand opH = pop(); return to_long(opH.bytes, opL.bytes); }
ostream& operator<<(ostream& s, const xdouble& a) { if (a == 0) { s << "0"; return s; } RRPush push; long temp_p = long(log(fabs(log(fabs(a))) + 1.0)/log(2.0)) + 10; RR::SetPrecision(temp_p); RR ln2, ln10, log_2_10; ComputeLn2(ln2); ComputeLn10(ln10); log_2_10 = ln10/ln2; ZZ log_10_a = to_ZZ( (to_RR(a.e)*to_RR(2*NTL_XD_HBOUND_LOG) + log(fabs(a.x))/log(2.0))/log_2_10); xdouble b; long neg; if (a < 0) { b = -a; neg = 1; } else { b = a; neg = 0; } ZZ k = xdouble::OutputPrecision() - log_10_a; xdouble c, d; c = PowerOf10(to_ZZ(xdouble::OutputPrecision())); d = PowerOf10(log_10_a); b = b / d; b = b * c; while (b < c) { b = b * 10.0; k++; } while (b >= c) { b = b / 10.0; k--; } b = b + 0.5; k = -k; ZZ B; conv(B, b); long bp_len = xdouble::OutputPrecision()+10; UniqueArray<char> bp_store; bp_store.SetLength(bp_len); char *bp = bp_store.get(); long len, i; len = 0; do { if (len >= bp_len) LogicError("xdouble output: buffer overflow"); bp[len] = IntValToChar(DivRem(B, B, 10)); len++; } while (B > 0); for (i = 0; i < len/2; i++) { char tmp; tmp = bp[i]; bp[i] = bp[len-1-i]; bp[len-1-i] = tmp; } i = len-1; while (bp[i] == '0') i--; k += (len-1-i); len = i+1; bp[len] = '\0'; if (k > 3 || k < -len - 3) { // use scientific notation if (neg) s << "-"; s << "0." << bp << "e" << (k + len); } else { long kk = to_long(k); if (kk >= 0) { if (neg) s << "-"; s << bp; for (i = 0; i < kk; i++) s << "0"; } else if (kk <= -len) { if (neg) s << "-"; s << "0."; for (i = 0; i < -len-kk; i++) s << "0"; s << bp; } else { if (neg) s << "-"; for (i = 0; i < len+kk; i++) s << bp[i]; s << "."; for (i = len+kk; i < len; i++) s << bp[i]; } } return s; }
void mymult(){ ZZX mya, myb, c0, c1, x; ZZ q; int k = to_long(euler_toient(to_ZZ(Modulus_M))); GenPrime(q, Max_Prime); RandomPolyGen(mya, k, 1, q); RandomPolyGen(myb, k, Max_Prime, q); long da = deg(mya); long db = deg(myb); long bound = 2 + NumBits(min(da, db)+1) + MaxBits(mya) + MaxBits(myb); ZZ prod; set(prod); int prime_num = GetPrimeNumber(bound, prod); cout << prime_num << endl; long mk = NextPowerOfTwo(2*da+1); zz_p::FFTInit(0); long p = zz_p::modulus(); fftRep R1[prime_num]; fftRep R2[prime_num]; fftRep R3[prime_num]; fftRep R4[prime_num]; int size = 256; fftRep Rm[prime_num][size]; for(int i=0; i<prime_num; i++) for(int j=0; j<size; j++) Rm[i][j].SetSize(mk); for(int i=0; i<prime_num; i++){ zz_p::FFTInit(i); R1[i].SetSize(mk); R2[i].SetSize(mk); R3[i].SetSize(mk); R4[i].SetSize(mk); } myTimer tm; tm.Start(); CalculateFFTValues(R1, mya, prime_num, db); tm.Stop(); tm.ShowTime("My FFT:\t"); CalculateFFTValues(R2, myb, prime_num, db); tm.Start(); for(int i=0; i<prime_num; i++) for(int j=0; j<size; j++) Rm[i][j] = R2[i]; for(int j=0; j<size; j++){ CalculateFFTValues(R1, mya, prime_num, db); for(int i=0; i<prime_num; i++){ zz_p::FFTInit(i); mul(R3[i], R1[i], Rm[i][j]); add(R4[i], R4[i], R3[i]); } } CalculateFFTValues(R4, myb, prime_num, db); tm.Stop(); tm.ShowTime("My FFT:\t"); }