void h_ode_set_safe(const real safein) { /* pre-condition */ BI_ASSERT(safein > BI_REAL(0.001) && safein < BI_REAL(1.0)); h_safe = safein; h_safe1 = BI_REAL(1.0) / safein; h_logsafe = bi::log(safein); }
void h_ode_init() { h_ode_set_h0(BI_REAL(1.0e-2)); h_ode_set_rtoler(BI_REAL(1.0e-7)); h_ode_set_atoler(BI_REAL(1.0e-7)); h_ode_set_uround(BI_REAL(1.0e-16)); h_ode_set_safe(BI_REAL(0.9)); h_ode_set_facl(BI_REAL(0.2)); h_ode_set_facr(BI_REAL(10.0)); h_ode_set_beta(BI_REAL(0.04)); h_ode_set_nsteps(1000); }
void h_ode_set_beta(const real betain) { /* pre-condition */ BI_ASSERT(betain >= 0.0 && betain <= BI_REAL(0.2)); h_beta = betain; h_expo1 = BI_REAL(0.2) - betain*BI_REAL(0.75); h_expo = BI_REAL(0.5)*(BI_REAL(0.2) - betain*BI_REAL(0.75)); }
void bi::RK43IntegratorHost<B,S,T1>::update(const T1 t1, const T1 t2, State<B,ON_HOST>& s) { /* pre-condition */ BI_ASSERT(t1 < t2); typedef typename temp_host_vector<real>::type vector_type; typedef Pa<ON_HOST,B,host,host,host,host> PX; typedef RK43VisitorHost<B,S,S,real,PX,real> Visitor; static const int N = block_size<S>::value; const int P = s.size(); #pragma omp parallel { vector_type r1(N), r2(N), err(N), old(N); real t, h, e, e2, logfacold, logfac11, fac; int n, id, p; PX pax; #pragma omp for for (p = 0; p < P; ++p) { t = t1; h = h_h0; logfacold = bi::log(BI_REAL(1.0e-4)); n = 0; host_load<B,S>(s, p, old); r1 = old; /* integrate */ while (t < t2 && n < h_nsteps) { if (BI_REAL(0.1)*bi::abs(h) <= bi::abs(t)*h_uround) { // step size too small } if (t + BI_REAL(1.01)*h - t2 > BI_REAL(0.0)) { h = t2 - t; if (h <= BI_REAL(0.0)) { t = t2; break; } } /* stages */ Visitor::stage1(t, h, s, p, pax, r1.buf(), r2.buf(), err.buf()); host_store<B,S>(s, p, r1); Visitor::stage2(t, h, s, p, pax, r1.buf(), r2.buf(), err.buf()); host_store<B,S>(s, p, r2); Visitor::stage3(t, h, s, p, pax, r1.buf(), r2.buf(), err.buf()); host_store<B,S>(s, p, r1); Visitor::stage4(t, h, s, p, pax, r1.buf(), r2.buf(), err.buf()); host_store<B,S>(s, p, r2); Visitor::stage5(t, h, s, p, pax, r1.buf(), r2.buf(), err.buf()); host_store<B,S>(s, p, r1); /* compute error */ e2 = BI_REAL(0.0); for (id = 0; id < N; ++id) { e = err(id)*h/(h_atoler + h_rtoler*bi::max(bi::abs(old(id)), bi::abs(r1(id)))); e2 += e*e; } e2 /= N; if (e2 <= BI_REAL(1.0)) { /* accept */ t += h; if (t < t2) { old = r1; } } else { /* reject */ r1 = old; host_store<B,S>(s, p, old); } /* compute next step size */ if (t < t2) { logfac11 = h_expo*bi::log(e2); if (e2 > BI_REAL(1.0)) { /* step was rejected */ h *= bi::max(h_facl, bi::exp(h_logsafe - logfac11)); } else { /* step was accepted */ fac = bi::exp(h_beta*logfacold + h_logsafe - logfac11); // Lund-stabilization fac = bi::min(h_facr, bi::max(h_facl, fac)); // bound h *= fac; logfacold = BI_REAL(0.5)*bi::log(bi::max(e2, BI_REAL(1.0e-8))); } } ++n; } } } }
void bi::DOPRI5IntegratorHost<B,S,T1>::update(const T1 t1, const T1 t2, State<B,ON_HOST>& s) { /* pre-condition */ BI_ASSERT(t1 < t2); typedef host_vector_reference<real> vector_reference_type; typedef Pa<ON_HOST,B,host,host,host,host> PX; typedef DOPRI5VisitorHost<B,S,S,real,PX,real> Visitor; static const int N = block_size<S>::value; const int P = s.size(); #pragma omp parallel { real buf[10*N]; // use of dynamic array faster than heap allocation vector_reference_type x0(buf, N); vector_reference_type x1(buf + N, N); vector_reference_type x2(buf + 2*N, N); vector_reference_type x3(buf + 3*N, N); vector_reference_type x4(buf + 4*N, N); vector_reference_type x5(buf + 5*N, N); vector_reference_type x6(buf + 6*N, N); vector_reference_type err(buf + 7*N, N); vector_reference_type k1(buf + 8*N, N); vector_reference_type k7(buf + 9*N, N); real t, h, e, e2, logfacold, logfac11, fac; int n, id, p; bool k1in; PX pax; #pragma omp for for (p = 0; p < P; ++p) { t = t1; h = h_h0; logfacold = bi::log(BI_REAL(1.0e-4)); k1in = false; n = 0; host_load<B,S>(s, p, x0); /* integrate */ while (t < t2 && n < h_nsteps) { if (BI_REAL(0.1)*bi::abs(h) <= bi::abs(t)*h_uround) { // step size too small } if (t + BI_REAL(1.01)*h - t2 > BI_REAL(0.0)) { h = t2 - t; if (h <= BI_REAL(0.0)) { t = t2; break; } } /* stages */ Visitor::stage1(t, h, s, p, pax, x0.buf(), x1.buf(), x2.buf(), x3.buf(), x4.buf(), x5.buf(), x6.buf(), k1.buf(), err.buf(), k1in); k1in = true; // can reuse from previous iteration in future host_store<B,S>(s, p, x1); Visitor::stage2(t, h, s, p, pax, x0.buf(), x2.buf(), x3.buf(), x4.buf(), x5.buf(), x6.buf(), err.buf()); host_store<B,S>(s, p, x2); Visitor::stage3(t, h, s, p, pax, x0.buf(), x3.buf(), x4.buf(), x5.buf(), x6.buf(), err.buf()); host_store<B,S>(s, p, x3); Visitor::stage4(t, h, s, p, pax, x0.buf(), x4.buf(), x5.buf(), x6.buf(), err.buf()); host_store<B,S>(s, p, x4); Visitor::stage5(t, h, s, p, pax, x0.buf(), x5.buf(), x6.buf(), err.buf()); host_store<B,S>(s, p, x5); Visitor::stage6(t, h, s, p, pax, x0.buf(), x6.buf(), err.buf()); /* compute error */ Visitor::stageErr(t, h, s, p, pax, x0.buf(), x6.buf(), k7.buf(), err.buf()); e2 = 0.0; for (id = 0; id < N; ++id) { e = err(id)*h/(h_atoler + h_rtoler*bi::max(bi::abs(x0(id)), bi::abs(x6(id)))); e2 += e*e; } e2 /= N; /* accept/reject */ if (e2 <= BI_REAL(1.0)) { /* accept */ t += h; x0.swap(x6); k1.swap(k7); } host_store<B,S>(s, p, x0); /* compute next step size */ if (t < t2) { logfac11 = h_expo*bi::log(e2); if (e2 > BI_REAL(1.0)) { /* step was rejected */ h *= bi::max(h_facl, bi::exp(h_logsafe - logfac11)); } else { /* step was accepted */ fac = bi::exp(h_beta*logfacold + h_logsafe - logfac11); // Lund-stabilization fac = bi::min(h_facr, bi::max(h_facl, fac)); // bound h *= fac; logfacold = BI_REAL(0.5)*bi::log(bi::max(e2, BI_REAL(1.0e-8))); } } ++n; } } } }
BI_FORCE_INLINE inline const bi::sse_real bi::operator-(const bi::sse_real& o) { return BI_SSE_XOR_P(BI_SSE_SET1_P(BI_REAL(-0.0)), o.packed); }
void h_ode_set_facr(const real facrin) { h_facr = facrin; h_facc2 = BI_REAL(1.0) / facrin; }
void h_ode_set_facl(const real faclin) { h_facl = faclin; h_facc1 = BI_REAL(1.0) / faclin; }
void h_ode_set_uround(const real uroundin) { /* pre-condition */ BI_ASSERT(uroundin > BI_REAL(1.0e-19) && uroundin < BI_REAL(1.0)); h_uround = uroundin; }