// Check for identically zero polynomials using randomized polynomial identity testing template<class R,class PerturbedT> static void assert_last_nonzero(void(*const polynomial)(R,RawArray<const Vector<Exact<1>,PerturbedT::m>>), R result, RawArray<const PerturbedT> X, const char* message) { typedef Vector<Exact<1>,PerturbedT::m> EV; const int n = X.size(); const auto Z = GEODE_RAW_ALLOCA(n,EV); for (const int k : range(20)) { for (int i=0;i<n;i++) Z[i] = EV(perturbation<PerturbedT::m>(k<<10,X[i].seed())); polynomial(result,Z); if (last_nonzero(result)) // Even a single nonzero means we're all good return; } // If we reach this point, the chance of a nonzero nonmalicious polynomial is something like (1e-5)^20 = 1e-100. Thus, we can safely assume that for the lifetime // of this code, we will never treat a nonzero polynomial as zero. If this comes up, we can easily bump the threshold a bit further. throw AssertionError(format("%s (there is likely a bug in the calling code), X = %s",message,str(X))); }
void hessian(RawArray<const T,2> x, RawArray<T,4> hess) const { // Temporary arrays and views GEODE_ASSERT(x.sizes()==vec(n+3,d) && hess.sizes()==vec(n+3,4,d,d)); const auto sx = smallx.flat.raw(), sv = smallv.flat.raw(); // Collect quadrature points const int e = 1+8*d+8*d*(d-1); Array<T,3> tq( n,quads,e,uninit); Array<T,4> xq(vec(n,quads,e,d),uninit); Array<T,4> vq(vec(n,quads,e,d),uninit); for (int i=0;i<n;i++) { T_INFO(i) for (int q=0;q<quads;q++) { const T s = samples[q], t = t1+dt*s; for (int j=0;j<e;j++) tq(i,q,j) = t; SPLINE_INFO(s) for (int a=0;a<d;a++) { X_INFO(i,a) const T x = a0*x0+a1*x1+a2*x2+a3*x3, v = b0*x0+b1*x1+b2*x2+b3*x3; for (int j=0;j<e;j++) { xq(i,q,j,a) = x; vq(i,q,j,a) = v; } int j = 1; for (int b=0;b<d;b++) { const T xb = sx[b], vb = sv[b]; xq(i,q,j++,a) -= xb; xq(i,q,j++,a) += xb; vq(i,q,j++,a) -= vb; vq(i,q,j++,a) += vb; xq(i,q,j ,a) -= xb; vq(i,q,j++,a) -= vb; xq(i,q,j ,a) -= xb; vq(i,q,j++,a) += vb; xq(i,q,j ,a) += xb; vq(i,q,j++,a) -= vb; xq(i,q,j ,a) += xb; vq(i,q,j++,a) += vb; for (int c=b+1;c<d;c++) { const T xc = sx[c], vc = sv[c]; xq(i,q,j++,a) -= xb+xc; xq(i,q,j++,a) -= xb-xc; xq(i,q,j++,a) += xb-xc; xq(i,q,j++,a) += xb+xc; vq(i,q,j++,a) -= vb+vc; vq(i,q,j++,a) -= vb-vc; vq(i,q,j++,a) += vb-vc; vq(i,q,j++,a) += vb+vc; vq(i,q,j++,a) -= sv[b]; xq(i,q,j ,a) -= sx[b]; vq(i,q,j++,a) += sv[b]; xq(i,q,j ,a) += sx[b]; vq(i,q,j++,a) -= sv[b]; xq(i,q,j ,a) += sx[b]; vq(i,q,j++,a) += sv[b]; } } } } } // Compute energies const auto Uq_ = U(tq.reshape_own(n*quads*d4),NdArray<const T>(q2shape,xq.flat),NdArray<const T>(q2shape,vq.flat)); GEODE_ASSERT(Uq_.size()==n*quads*d4); const auto Uq = Uq_.reshape(n,quads,d4); // Accumulate grad.fill(0); const auto inv_2s = GEODE_RAW_ALLOCA(d,Vector<T,2>); for (int a=0;a<d;a++) inv_2s[a] = vec(.5/sx[a],.5/sv[a]); for (int i=0;i<n;i++) { T_INFO(i) for (int q=0;q<quads;q++) { const T s = samples[q], w = dt*weights[q]; SPLINE_INFO(s) for (int b=0;b<d;b++) { const T wx = w*inv_2s[b].x*(Uq(i,q,4*b+1)-Uq(i,q,4*b )), wv = w*inv_2s[b].y*(Uq(i,q,4*b+3)-Uq(i,q,4*b+2)); grad(i ,b) += a0*wx+b0*wv; grad(i+1,b) += a1*wx+b1*wv; grad(i+2,b) += a2*wx+b2*wv; grad(i+3,b) += a3*wx+b3*wv; } } } }
void gradient(RawArray<const T,2> x, RawArray<T,2> grad) const { // Temporary arrays and views GEODE_ASSERT(x.sizes()==vec(n+3,d) && grad.sizes()==x.sizes()); const auto sx = smallx.flat.raw(), sv = smallv.flat.raw(); // Collect quadrature points const int e = 4*d; Array<T,3> tq( n,quads,e,uninit); Array<T,4> xq(vec(n,quads,e,d),uninit); Array<T,4> vq(vec(n,quads,e,d),uninit); for (int i=0;i<n;i++) { T_INFO(i) for (int q=0;q<quads;q++) { const T s = samples[q], t = t1+dt*s; for (int j=0;j<e;j++) tq(i,q,j) = t; SPLINE_INFO(s) for (int a=0;a<d;a++) { X_INFO(i,a) const T x = a0*x0+a1*x1+a2*x2+a3*x3, v = b0*x0+b1*x1+b2*x2+b3*x3; for (int j=0;j<e;j++) { xq(i,q,j,a) = x; vq(i,q,j,a) = v; } } for (int a=0;a<d;a++) { xq(i,q,4*a ,a) -= sx[a]; xq(i,q,4*a+1,a) += sx[a]; vq(i,q,4*a+2,a) -= sv[a]; vq(i,q,4*a+3,a) += sv[a]; } } } // Compute energies const auto Uq_ = U(tq.reshape_own(n*quads*e),NdArray<const T>(q2shape,xq.flat),NdArray<const T>(q2shape,vq.flat)); GEODE_ASSERT(Uq_.size()==n*quads*e); const auto Uq = Uq_.reshape(n,quads,e); // Accumulate grad.fill(0); const auto inv_2s = GEODE_RAW_ALLOCA(d,Vector<T,2>); for (int a=0;a<d;a++) inv_2s[a] = vec(.5/sx[a],.5/sv[a]); for (int i=0;i<n;i++) { T_INFO(i) for (int q=0;q<quads;q++) { const T s = samples[q], w = dt*weights[q]; SPLINE_INFO(s) for (int a=0;a<d;a++) { const T wx = w*inv_2s[a].x*(Uq(i,q,4*a+1)-Uq(i,q,4*a )), wv = w*inv_2s[a].y*(Uq(i,q,4*a+3)-Uq(i,q,4*a+2)); grad(i ,a) += a0*wx+b0*wv; grad(i+1,a) += a1*wx+b1*wv; grad(i+2,a) += a2*wx+b2*wv; grad(i+3,a) += a3*wx+b3*wv; } } } }
template<class PerturbedT> bool perturbed_ratio(RawArray<Quantized> result, void(*const ratio)(RawArray<mp_limb_t,2>,RawArray<const Vector<Exact<1>,PerturbedT::m>>), const int degree, RawArray<const PerturbedT> X, const bool take_sqrt) { const int m = PerturbedT::m; typedef Vector<Exact<1>,m> EV; const int n = X.size(); const int r = result.size(); if (verbose) cout << "perturbed_ratio:\n degree = "<<degree<<"\n X = "<<X<<endl; // Check if the ratio is nonsingular before perturbation const auto Z = GEODE_RAW_ALLOCA(n,EV); const int precision = degree*Exact<1>::ratio; { for (int i=0;i<n;i++) Z[i] = EV(to_exact(X[i].value())); const auto R = GEODE_RAW_ALLOCA((r+1)*precision,mp_limb_t).reshape(r+1,precision); ratio(R,Z); if (const int sign = mpz_sign(R[r])) { snap_divs(result,R,take_sqrt); return sign>0; } } // Check the first perturbation level with specialized code vector<Vector<ExactInt,m>> Y(n); // perturbations { // Compute the first level of perturbations for (int i=0;i<n;i++) Y[i] = perturbation<m>(1,X[i].seed()); if (verbose) cout << " Y = "<<Y<<endl; // Evaluate polynomial at epsilon = 1, ..., degree const int scaled_precision = precision+factorial_limbs(degree); const auto values = GEODE_RAW_ALLOCA(degree*(r+1)*scaled_precision,mp_limb_t).reshape(degree,r+1,scaled_precision); for (int j=0;j<degree;j++) { for (int i=0;i<n;i++) Z[i] = EV(to_exact(X[i].value())+(j+1)*Y[i]); ratio(values[j],Z); if (verbose) cout << " ratio("<<Z<<") = "<<mpz_str(values[j])<<endl; } // Find interpolating polynomials, overriding the input with the result. for (int k=0;k<=r;k++) { scaled_univariate_in_place_interpolating_polynomial(values.sub<1>(k)); if (verbose) cout << " coefs "<<k<<" = "<<mpz_str(values.sub<1>(k))<<endl; } // Find the largest (lowest degree) nonzero denominator coefficient. If we detect an infinity during this process, explode. for (int j=0;j<degree;j++) { if (const int sign = mpz_sign(values(j,r))) { // We found a nonzero, now compute the rounded ratio snap_divs(result,values[j],take_sqrt); return sign>0; } else for (int k=0;k<r;k++) if (mpz_nonzero(values(j,k))) throw OverflowError(format("perturbed_ratio: infinite result in l'Hopital expansion: %s/0",mpz_str(values(j,k)))); } } { // Add one perturbation level after another until we hit a nonzero denominator. Our current implementation duplicates // work from one iteration to the next for simplicity, which is fine since the first interation suffices almost always. for (int d=2;;d++) { // Compute the next level of perturbations Y.resize(d*n); for (int i=0;i<n;i++) Y[(d-1)*n+i] = perturbation<m>(d,X[i].seed()); // Evaluate polynomial at every point in an "easy corner" const auto lambda = monomials(degree,d); const Array<mp_limb_t,3> values(lambda.m,r+1,precision,uninit); for (int j=0;j<lambda.m;j++) { for (int i=0;i<n;i++) Z[i] = EV(to_exact(X[i].value())+lambda(j,0)*Y[i]); for (int v=1;v<d;v++) for (int i=0;i<n;i++) Z[i] += EV(lambda(j,v)*Y[v*n+i]); ratio(values[j],Z); } // Find interpolating polynomials, overriding the input with the result. for (int k=0;k<=r;k++) in_place_interpolating_polynomial(degree,lambda,values.sub<1>(k)); // Find the largest nonzero denominator coefficient int sign = 0; int nonzero = -1; for (int j=0;j<lambda.m;j++) if (const int s = mpz_sign(values(j,r))) { if (check) // Verify that a term which used to be zero doesn't become nonzero GEODE_ASSERT(lambda(j,d-1)); if (nonzero<0 || monomial_less(lambda[nonzero],lambda[j])) { sign = s; nonzero = j; } } // Verify that numerator coefficients are zero for all large monomials for (int j=0;j<lambda.m;j++) if (nonzero<0 || monomial_less(lambda[nonzero],lambda[j])) for (int k=0;k<r;k++) if (mpz_nonzero(values(j,k))) throw OverflowError(format("perturbed_ratio: infinite result in l'Hopital expansion: %s/0",str(values(j,k)))); // If we found a nonzero, compute the result if (nonzero >= 0) { snap_divs(result,values[nonzero],take_sqrt); return sign>0; } // If we get through two levels without fixing the degeneracy, run a fast, strict identity test to make sure we weren't handed an impossible problem. if (d==2) assert_last_nonzero(ratio,values[0],X,"perturbed_ratio: identically zero denominator"); } } }
// Cast num/den to an int, rounding towards nearest. All inputs are destroyed. Take a sqrt if desired. // The values array must consist of r numerators followed by one denominator. void snap_divs(RawArray<Quantized> result, RawArray<mp_limb_t,2> values, const bool take_sqrt) { assert(result.size()+1==values.m); // For division, we seek x s.t. // x-1/2 <= num/den <= x+1/2 // 2x-1 <= 2num/den <= 2x+1 // 2x-1 <= floor(2num/den) <= 2x+1 // 2x <= 1+floor(2num/den) <= 2x+2 // x <= (1+floor(2num/den))//2 <= x+1 // x = (1+floor(2num/den))//2 // In the sqrt case, we seek a nonnegative integer x s.t. // x-1/2 <= sqrt(num/den) < x+1/2 // 2x-1 <= sqrt(4num/den) < 2x+1 // Now the leftmost and rightmost expressions are integral, so we can take floors to get // 2x-1 <= floor(sqrt(4num/den)) < 2x+1 // Since sqrt is monotonic and maps integers to integers, floor(sqrt(floor(x))) = floor(sqrt(x)), so // 2x-1 <= floor(sqrt(floor(4num/den))) < 2x+1 // 2x <= 1+floor(sqrt(floor(4num/den))) < 2x+2 // x <= (1+floor(sqrt(floor(4num/den))))//2 < x+1 // x = (1+floor(sqrt(floor(4num/den))))//2 // Thus, both cases look like // x = (1+f(2**k*num/den))//2 // where k = 1 or 2 and f is some truncating integer op (division or division+sqrt). // Adjust denominator to be positive const auto raw_den = values[result.size()]; const bool den_negative = mp_limb_signed_t(raw_den.back())<0; if (den_negative) mpn_neg(raw_den.data(),raw_den.data(),raw_den.size()); const auto den = trim(raw_den); assert(den.size()); // Zero should be prevented by the caller // Prepare for divisions const auto q = GEODE_RAW_ALLOCA(values.n-den.size()+1,mp_limb_t), r = GEODE_RAW_ALLOCA(den.size(),mp_limb_t); // Compute each component of the result for (int i=0;i<result.size();i++) { // Adjust numerator to be positive const auto num = values[i]; const bool num_negative = mp_limb_signed_t(num.back())<0; if (take_sqrt && num_negative!=den_negative && !num.contains_only(0)) throw RuntimeError("perturbed_ratio: negative value in square root"); if (num_negative) mpn_neg(num.data(),num.data(),num.size()); // Add enough bits to allow round-to-nearest computation after performing truncating operations mpn_lshift(num.data(),num.data(),num.size(),take_sqrt?2:1); // Perform division mpn_tdiv_qr(q.data(),r.data(),0,num.data(),num.size(),den.data(),den.size()); const auto trim_q = trim(q); if (!trim_q.size()) { result[i] = 0; continue; } // Take sqrt if desired, reusing the num buffer const auto s = take_sqrt ? sqrt_helper(num,trim_q) : trim_q; // Verify that result lies in [-exact::bound,exact::bound]; const int ratio = sizeof(ExactInt)/sizeof(mp_limb_t); static_assert(ratio<=2,""); if (s.size() > ratio) goto overflow; const auto nn = ratio==2 && s.size()==2 ? s[0]|ExactInt(s[1])<<8*sizeof(mp_limb_t) : s[0], n = (1+nn)/2; if (uint64_t(n) > uint64_t(exact::bound)) goto overflow; // Done! result[i] = (num_negative==den_negative?1:-1)*Quantized(n); } return; overflow: throw OverflowError("perturbed_ratio: overflow in l'Hopital expansion"); }
template<class PerturbedT> bool perturbed_sign(void(*const predicate)(RawArray<mp_limb_t>,RawArray<const Vector<Exact<1>,PerturbedT::m>>), const int degree, RawArray<const PerturbedT> X) { const int m = PerturbedT::m; typedef Vector<Exact<1>,m> EV; if (check) GEODE_WARNING("Expensive consistency checking enabled"); const int n = X.size(); if (verbose) cout << "perturbed_sign:\n degree = "<<degree<<"\n X = "<<X<<endl; // Check if the predicate is nonsingular without perturbation const auto Z = GEODE_RAW_ALLOCA(n,EV); const int precision = degree*Exact<1>::ratio; { for (int i=0;i<n;i++) Z[i] = EV(to_exact(X[i].value())); const auto R = GEODE_RAW_ALLOCA(precision,mp_limb_t); predicate(R,Z); if (const int sign = mpz_sign(R)) return sign>0; } // Check the first perturbation level with specialized code vector<Vector<ExactInt,m>> Y(n); // perturbations { // Compute the first level of perturbations for (int i=0;i<n;i++) Y[i] = perturbation<m>(1,X[i].seed()); if (verbose) cout << " Y = "<<Y<<endl; // Evaluate polynomial at epsilon = 1, ..., degree const int scaled_precision = precision+factorial_limbs(degree); const auto values = GEODE_RAW_ALLOCA(degree*scaled_precision,mp_limb_t).reshape(degree,scaled_precision); memset(values.data(),0,sizeof(mp_limb_t)*values.flat.size()); for (int j=0;j<degree;j++) { for (int i=0;i<n;i++) Z[i] = EV(to_exact(X[i].value())+(j+1)*Y[i]); predicate(values[j],Z); if (verbose) cout << " predicate("<<Z<<") = "<<mpz_str(values[j])<<endl; } // Find an interpolating polynomial, overriding the input with the result. scaled_univariate_in_place_interpolating_polynomial(values); if (verbose) cout << " coefs = "<<mpz_str(values)<<endl; // Compute sign for (int j=0;j<degree;j++) if (const int sign = mpz_sign(values[j])) return sign>0; } { // Add one perturbation level after another until we hit a nonzero polynomial. Our current implementation duplicates // work from one iteration to the next for simplicity, which is fine since the first interation suffices almost always. for (int d=2;;d++) { if (verbose) cout << " level "<<d<<endl; // Compute the next level of perturbations Y.resize(d*n); for (int i=0;i<n;i++) Y[(d-1)*n+i] = perturbation<m>(d,X[i].seed()); // Evaluate polynomial at every point in an "easy corner" const auto lambda = monomials(degree,d); const Array<mp_limb_t,2> values(lambda.m,precision,uninit); for (int j=0;j<lambda.m;j++) { for (int i=0;i<n;i++) Z[i] = EV(to_exact(X[i].value())+lambda(j,0)*Y[i]); for (int v=1;v<d;v++) for (int i=0;i<n;i++) Z[i] += EV(lambda(j,v)*Y[v*n+i]); predicate(values[j],Z); } // Find an interpolating polynomial, overriding the input with the result. in_place_interpolating_polynomial(degree,lambda,values); // Compute sign int sign = 0; int sign_j = -1; for (int j=0;j<lambda.m;j++) if (const int s = mpz_sign(values[j])) { if (check) // Verify that a term which used to be zero doesn't become nonzero GEODE_ASSERT(lambda(j,d-1)); if (!sign || monomial_less(lambda[sign_j],lambda[j])) { sign = s; sign_j = j; } } // If we find a nonzero sign, we're done! if (sign) return sign>0; // If we get through two levels without fixing the degeneracy, run a fast, strict identity test to make sure we weren't handed an impossible problem. if (d==2) assert_last_nonzero(predicate,values[0],X,"perturbed_sign: identically zero predicate"); } } }