virtual stats local_run_l(int index, bool learn) { DAction* ac = fac; double total_reward = 0; DAction* b = new DAction(prob->getActions(), (int) false ); do { prob->apply(*ac); total_reward += prob->reward(); if(learn) { if(!prob->restrictedAction().restricted) ac = this->computeNextAction(getState(prob), prob->reward(), prob->goal()); else { agent->had_choosed(getState(prob), *b, prob->reward(), false, prob->goal()); ac = b; } // ac = this->computeNextAction(getState(prob), prob->reward(), prob->goal()); } else { if(ac != fac) delete ac; ac = this->agent->decision(getState(prob), false); } } while(!prob->done()); if(!learn) delete ac; delete b; return {prob->step, total_reward, prob->step, index}; }
void ompl::base::SO3StateSpace::enforceBounds(State *state) const { // see http://stackoverflow.com/questions/11667783/quaternion-and-normalization/12934750#12934750 StateType *qstate = static_cast<StateType*>(state); double nrmsq = quaternionNormSquared(*qstate); double error = std::abs(1.0 - nrmsq); const double epsilon = 2.107342e-08; if (error < epsilon) { double scale = 2.0 / (1.0 + nrmsq); qstate->x *= scale; qstate->y *= scale; qstate->z *= scale; qstate->w *= scale; } else { if (nrmsq < 1e-6) qstate->setIdentity(); else { double scale = 1.0 / std::sqrt(nrmsq); qstate->x *= scale; qstate->y *= scale; qstate->z *= scale; qstate->w *= scale; } } }
// #include <QuantumStateITPP.h> template <class StateType> StateType StateTensor(const StateType& psi_1, const StateType& psi_2){ StateType tmp(psi_1.size()*psi_2.size()); for (int i =0; i<psi_1.size(); i++){ tmp.coefficients.set_subvector(i*psi_2.size(),psi_1.coefficients(i)*psi_2.coefficients); } return tmp; }
const size_t* ForwardDeadReckoningPropnetStateMachine::Simulate6(const StateType& s) { StateType temp = StateType(); MoveType m = MoveType(); temp.Equate(s); size_t role_id = 0; for(size_t i = 0;i < role_size;i++) { if(alt_role_masks[i] & temp) { role_id = i; break; } } while(!role_propnets[role_id].Update(temp, *role_propnet_payloads[role_id])) { role_propnets[role_id].GetRandomLegalMove(*role_propnet_payloads[role_id], m); role_propnets[role_id].Update(m, *role_propnet_payloads[role_id]); temp.Equate(role_propnet_payloads[role_id]->GetState()); role_id = ++role_id % role_size; //cout << m << endl; //cout << temp << endl; } return GetGoals(temp); }
stats local_run(int index, bool random_init) { prob->init(random_init); agent->startEpisode(getState(prob), *fac); // if(no_learn_knowledge) { local_run_l(index, true); prob->init(random_init); agent->startEpisode(getState(prob), *fac); return local_run_l(index, false); // } // return local_run_l(index, true); }
void ompl::base::SO3StateSpace::enforceBounds(State *state) const { StateType *qstate = static_cast<StateType*>(state); double nrm = norm(qstate); if (fabs(nrm) < MAX_QUATERNION_NORM_ERROR) qstate->setIdentity(); else if (fabs(nrm - 1.0) > MAX_QUATERNION_NORM_ERROR) { qstate->x /= nrm; qstate->y /= nrm; qstate->z /= nrm; qstate->w /= nrm; } }
static void handle_plus(StateType& state, typename StateType::clause_type clause, typename StateType::variable_type flip) { if(state.num_true_literals(clause) == 0) { state.inc_breakcount(flip, state.weight(clause)); state.watcher1(clause, flip); } else if(state.num_true_literals(clause) == 1) { state.dec_breakcount(state.watcher1(clause), state.weight(clause)); state.watcher2(clause, flip); } state.inc_num_true_literals(clause); }
void RigidBody::GetState(StateType& State) { // Copy the elements of the member variables that describe the state of // the rigid body into the state vector. StateType::iterator it = State.begin(); if (State.size() != m_CountStateVectors) { State.resize(m_CountStateVectors); } *it++ = m_Position; *it++ = bnu::column(m_Rotate, 0); *it++ = bnu::column(m_Rotate, 1); *it++ = bnu::column(m_Rotate, 2); *it++ = m_AngularMomentum; *it = m_LinearMomentum; }
const size_t* ForwardDeadReckoningPropnetStateMachine::Simulate5(const StateType& s) { StateType temp; MoveType m; temp.Equate(s); while(!initial_pn.Update(temp, *initial_pn_payload)) { initial_pn.GetRandomLegalMove(*initial_pn_payload, m); initial_pn.Update(m, *initial_pn_payload); temp.Equate(initial_pn_payload->GetState()); } return GetGoals(temp); }
void gram_schmidt( StateType &x , LyapType &lyap , size_t n ) { if( !num_of_lyap ) return; if( ptrdiff_t( ( num_of_lyap + 1 ) * n ) != std::distance( x.begin() , x.end() ) ) throw std::domain_error( "renormalization() : size of state does not match the number of lyapunov exponents." ); typedef typename StateType::value_type value_type; typedef typename StateType::iterator iterator; value_type norm[num_of_lyap]; value_type tmp[num_of_lyap]; iterator first = x.begin() + n; iterator beg1 = first , end1 = first + n ; std::fill( norm , norm+num_of_lyap , 0.0 ); // normalize first vector norm[0] = sqrt( std::inner_product( beg1 , end1 , beg1 , 0.0 ) ); normalize( beg1 , end1 , norm[0] ); beg1 += n; end1 += n; for( size_t j=1 ; j<num_of_lyap ; ++j , beg1+=n , end1+=n ) { for( size_t k=0 ; k<j ; ++k ) { tmp[k] = std::inner_product( beg1 , end1 , first + k*n , 0.0 ); // clog << j << " " << k << " " << tmp[k] << "\n"; } for( size_t k=0 ; k<j ; ++k ) substract_vector( beg1 , end1 , first + k*n , tmp[k] ); // normalize j-th vector norm[j] = sqrt( std::inner_product( beg1 , end1 , beg1 , 0.0 ) ); // clog << j << " " << norm[j] << "\n"; normalize( beg1 , end1 , norm[j] ); } for( size_t j=0 ; j<num_of_lyap ; j++ ) lyap[j] += log( norm[j] ); }
/** * Store the given experience. * * @param state Given state. * @param action Given action. * @param reward Given reward. * @param nextState Given next state. * @param isEnd Whether next state is terminal state. */ void Store(const StateType& state, ActionType action, double reward, const StateType& nextState, bool isEnd) { states.col(position) = state.Encode(); actions(position) = action; rewards(position) = reward; nextStates.col(position) = nextState.Encode(); isTerminal(position) = isEnd; position++; if (position == capacity) { full = true; position = 0; } }
void RigidBody::GetStateDerivative(StateType& State) { //Work out the state derivatives and place them in State StateType::iterator it = State.begin(); bnu::vector<double> AngularMomentum; bnu::vector<double> Temp0; bnu::vector<double> Temp1; if (State.size() != m_CountStateVectors) { State.resize(m_CountStateVectors); } bnu::vector<double> LinearVelocity(m_LinearMomentum/m_Mass); /* Inertia Tensor = R*IBodyInv*RTranspose*/ bnu::matrix<double> InertiaTensorInv((bnu::prod(m_Rotate, m_BodySpaceInertiaTensorInv))); InertiaTensorInv = bnu::prod(InertiaTensorInv, bnu::trans(m_Rotate)); /* omega = IInv * angularmomentum*/ bnu::vector<double> Omega(bnu::prod(InertiaTensorInv, m_AngularMomentum)); *it++ = LinearVelocity; /* work out the derivative of R(t) and copy the result into the state array*/ Temp0 = bnu::column(m_Rotate, 0); Cross(m_AngularMomentum, Temp0, Temp1); *it++ = Temp1; Temp0 = bnu::column(m_Rotate, 1); Cross(m_AngularMomentum, Temp0, Temp1); *it++ = Temp1; Temp0 = bnu::column(m_Rotate, 2); Cross(m_AngularMomentum, Temp0, Temp1); *it++ = Temp1; /* copy force and torque into the array */ *it++ = m_Force; *it++ = m_Torque; }
void VarianceDiffusionStrategy<StateType>::diffuse(const ParticleList &_src, ParticleList &_dest) { if(_src.size() == 0) return; if(_dest.size() != _src.size()) _dest = _src; const unsigned numDofs = _src[0].state.size(); unsigned index = 0; StateType noise = _dest[0].state; if(mVariance.size() != noise.size()) { TRACE("------------- variance has wrong size. (should not be possible) ---------- ABORTING"); return; } if(mVarianceFactor.size() != noise.size()) { TRACE("----------resizing variance factors. (should not happen) -------------- "); mVarianceFactor.resize(noise.size(), 1); } // iterate over all source particles for(ParticleListConstIterator it = _src.begin(); it != _src.end(); it++) { // for every dof for(unsigned i=0; i < numDofs; i++) { noise[i] = this->mNormDistGenerator() * mVariance[i] * mVarianceFactor[i]; } _dest[index].state = noise; _dest[index].state += _src[index].state; index++; } }
stats run_best(int index) { prob->init(false); DAction* ac = new DAction(*fac); double total_reward = 0; do { prob->apply(*ac); delete ac; total_reward += prob->reward(); ac = best_policy->decision(getState(prob), false); } while(!prob->done()); delete ac; return {prob->step, total_reward, prob->step, index}; }
inline int CashKarp54(const SystemType &dxdt, StateType &x, double &t, double &h, double relTol, double absTol, double maxStepSize) { // Constants from Butcher tableau, see: http://en.wikipedia.org/wiki/Cash-Karp_method // and http://en.wikipedia.org/wiki/Runge-Kutta_methods const double c2 = 1.0 / 5.0; const double c3 = 3.0 / 10.0; const double c4 = 3.0 / 5.0; const double c5 = 1.0; const double c6 = 7.0 / 8.0; const double b5th1 = 37.0 / 378.0; const double b5th2 = 0.0; const double b5th3 = 250.0 / 621.0; const double b5th4 = 125.0 / 594.0; const double b5th5 = 0.0; const double b5th6 = 512.0 / 1771.0; const double b4th1 = 2825.0 / 27648.0; const double b4th2 = 0.0; const double b4th3 = 18575.0 / 48384.0; const double b4th4 = 13525.0 / 55296.0; const double b4th5 = 277.0 / 14336.0; const double b4th6 = 1.0 / 4.0; const double bDiff1 = b5th1 - b4th1; const double bDiff2 = b5th2 - b4th2; const double bDiff3 = b5th3 - b4th3; const double bDiff4 = b5th4 - b4th4; const double bDiff5 = b5th5 - b4th5; const double bDiff6 = b5th6 - b4th6; const double a21 = 1.0 / 5.0; const double a31 = 3.0 / 40.0; const double a32 = 9.0 / 40.0; const double a41 = 3.0 / 10.0; const double a42 = -9.0 / 10.0; const double a43 = 6.0 / 5.0; const double a51 = -11.0 / 54.0; const double a52 = 5.0 / 2.0; const double a53 = -70.0 / 27.0; const double a54 = 35.0 / 27.0; const double a61 = 1631.0 / 55296.0; const double a62 = 175.0 / 512.0; const double a63 = 575.0 / 13824.0; const double a64 = 44275.0 / 110592.0; const double a65 = 253.0 / 4096.0; const std::size_t stateSize = x.size(); StateType tempState; // used to store state for next k value and later used // for error difference StateType k1; dxdt(x, k1, t); // fill k1 for (std::size_t i = 0; i < stateSize; ++i) tempState[i] = x[i] + h * a21 * k1[i]; StateType k2; dxdt(tempState, k2, t + c2 * h); // fill k2 for (std::size_t i = 0; i < stateSize; ++i) tempState[i] = x[i] + h * (a31 * k1[i] + a32 * k2[i]); StateType k3; dxdt(tempState, k3, t + c3 * h); // fill k3 for (std::size_t i = 0; i < stateSize; ++i) tempState[i] = x[i] + h * (a41 * k1[i] + a42 * k2[i] + a43 * k3[i]); StateType k4; dxdt(tempState, k4, t + c4 * h); // fill k4 for (std::size_t i = 0; i < stateSize; ++i) tempState[i] = x[i] + h * (a51 * k1[i] + a52 * k2[i] + a53 * k3[i] + a54 * k4[i]); StateType k5; dxdt(tempState, k5, t + c5 * h); // fill k5 for (std::size_t i = 0; i < stateSize; ++i) tempState[i] = x[i] + h * (a61 * k1[i] + a62 * k2[i] + a63 * k3[i] + a64 * k4[i] + a65 * k5[i]); StateType k6; dxdt(tempState, k6, t + c6 * h); // fill k6 StateType order5Solution; for (std::size_t i = 0; i < stateSize; ++i) order5Solution[i] = h * (b5th1 * k1[i] + b5th2 * k2[i] + b5th3 * k3[i] + b5th4 * k4[i] + b5th5 * k5[i] + b5th6 * k6[i]); // difference between order 4 and 5, used for error check, reusing tempState // variable for (std::size_t i = 0; i < stateSize; ++i) tempState[i] = h * (bDiff1 * k1[i] + bDiff2 * k2[i] + bDiff3 * k3[i] + bDiff4 * k4[i] + bDiff5 * k5[i] + bDiff6 * k6[i]); StateType potentialSolution; for (std::size_t i = 0; i < stateSize; ++i) potentialSolution[i] = x[i] + order5Solution[i]; // boost odeint syle error step sizing method StateType errorValueList; for (std::size_t i = 0; i < stateSize; ++i) errorValueList[i] = std::abs(tempState[i] / (absTol + relTol * (potentialSolution[i]))); double maxErrorValue = *(std::max_element(errorValueList.begin(), errorValueList.end())); // reject step and decrease step size if (maxErrorValue > 1.0) { h = h * std::max(0.9 * std::pow(maxErrorValue, -0.25), 0.2); return 0; } // use the step t += h; for (std::size_t i = 0; i < stateSize; ++i) x[i] = potentialSolution[i]; // if error is small enough then increase step size if (maxErrorValue < 0.5) { h = std::min(h * std::min(0.9 * std::pow(maxErrorValue, -0.20), 5.0), maxStepSize); } return 1; }
void init() { agent = this->createAgent(getState(prob), *fac); agentSet = true; }
void ompl::base::DubinsStateSpace::interpolate(const State *from, const DubinsPath& path, double t, State *state) const { StateType *s = allocState()->as<StateType>(); double seg = t * path.length(), phi, v; s->setXY(0., 0.); s->setYaw(from->as<StateType>()->getYaw()); if (!path.reverse_) { for (unsigned int i=0; i<3 && seg>0; ++i) { v = std::min(seg, path.length_[i]); phi = s->getYaw(); seg -= v; switch(path.type_[i]) { case DUBINS_LEFT: s->setXY(s->getX() + sin(phi+v) - sin(phi), s->getY() - cos(phi+v) + cos(phi)); s->setYaw(phi+v); break; case DUBINS_RIGHT: s->setXY(s->getX() - sin(phi-v) + sin(phi), s->getY() + cos(phi-v) - cos(phi)); s->setYaw(phi-v); break; case DUBINS_STRAIGHT: s->setXY(s->getX() + v * cos(phi), s->getY() + v * sin(phi)); break; } } } else { for (unsigned int i=0; i<3 && seg>0; ++i) { v = std::min(seg, path.length_[2-i]); phi = s->getYaw(); seg -= v; switch(path.type_[2-i]) { case DUBINS_LEFT: s->setXY(s->getX() + sin(phi-v) - sin(phi), s->getY() - cos(phi-v) + cos(phi)); s->setYaw(phi-v); break; case DUBINS_RIGHT: s->setXY(s->getX() - sin(phi+v) + sin(phi), s->getY() + cos(phi+v) - cos(phi)); s->setYaw(phi+v); break; case DUBINS_STRAIGHT: s->setXY(s->getX() - v * cos(phi), s->getY() - v * sin(phi)); break; } } } state->as<StateType>()->setX(s->getX() * rho_ + from->as<StateType>()->getX()); state->as<StateType>()->setY(s->getY() * rho_ + from->as<StateType>()->getY()); getSubSpace(1)->enforceBounds(s->as<SO2StateSpace::StateType>(1)); state->as<StateType>()->setYaw(s->getYaw()); freeState(s); }
void ForwardDeadReckoningPropnetStateMachine::MetaGame_multi_player(size_t simulation_time) { isZeroSumGame = true; isAlternatingMoves = true; size_t num_simulations = 0; size_t stop = util::Timer::microtimer() + simulation_time; StateType temp; for(size_t i = 0;i < core::RawAState::arr_size;i++) temp->s[i] = 255; alt_role_masks = new StateType[role_size]; for(size_t i = 0;i < role_size;i++) alt_role_masks[i] = temp.Clone(); while(util::Timer::microtimer() < stop) { StateType temp = InitState().Clone(); MoveType m; while(!initial_pn.Update(temp, *initial_pn_payload)) { if(isAlternatingMoves) { int current_control_r_id = -1; for(size_t i = 0;i < role_size;i++) { if(initial_pn_payload->legal_size[i] > 1) { if(current_control_r_id > -1) { isAlternatingMoves = false; break; } else current_control_r_id = i; } } if(current_control_r_id != -1) alt_role_masks[current_control_r_id] = temp & std::move(alt_role_masks[current_control_r_id]); } initial_pn.GetRandomLegalMove(*initial_pn_payload, m); initial_pn.Update(m, *initial_pn_payload); temp.Equate(initial_pn_payload->top); //cout << m << endl; //cout << temp << endl; // // size_t t; // cin >> t; } if(isZeroSumGame) CheckZeroSumGame(); num_simulations++; } log.Info << "Performed " << num_simulations << " simulations in meta game simulations." << std::endl; if(isZeroSumGame) { log.Info << "Game is Zero-Sum game." << std::endl; } if(isAlternatingMoves) { log.Info << "Game with alternating moves." << std::endl; } else { delete[] alt_role_masks; alt_role_masks = NULL; } }
const size_t* ForwardDeadReckoningPropnetStateMachine::Simulate2(const StateType& s) { StateType temp; temp.Equate(s); size_t state_index = 0; bool isLoop = true; bool isLoop2 = true; size_t t; //cout << "testing-2" << endl; StateType top = without_terminal_payload->top; MoveSet* m_set = without_terminal_payload->m_set; while(!without_terminal_payload->terminal) { //cout << "testing-1" << endl; without_terminal_pn->Update2(temp, *without_terminal_payload); //cout << "testing-4" << endl; for(size_t i = 0;i < role_size;i++) { //cout << *m_set[i].begin() << endl; simulate2_it[i] = m_set[i].begin(); // if(m_set[i].begin() == m_set[i].end()) // { // std::cout << s << std::endl; // std::cout << LOGID << ": lol" << endl; // size_t t; // std::cin >> t; // } } state_index = 0; isLoop2 = true; while(isLoop2) { //cout << "testing" << endl; without_terminal_pn->Update3(simulate2_it, *without_terminal_payload); //cout << "testing1" << endl; //std::cout << without_terminal_payload->top << std::endl; //std::cin >> t; if(IsTerminal_compiled(top, GetGoals_buff)) { return GetGoals(top); } else simulate2_state_arr[state_index++].Equate(top); //cout << "testing2" << endl; size_t index = 0; while(true) { simulate2_it[index]++; if(simulate2_it[index] == m_set[index].end()) { if(index == role_size - 1) { isLoop2 = false; break; } simulate2_it[index] = m_set[index].begin(); index++; } else break; } } //cout << "testing3" << endl; size_t rnd = rand() % state_index; temp.Equate(simulate2_state_arr[rnd]); //std::cout << temp << std::endl; //std::cin >> t; } return GetGoals(temp); }
static void handle_minus(StateType& state, typename StateType::clause_type clause, typename StateType::variable_type flip) { if(state.num_true_literals(clause) == 1) { for(auto literal : clause) state.inc_makecount(literal.variable(), state.weight(clause)); state.dec_breakcount(flip, state.weight(clause)); } else if(state.num_true_literals(clause) == 2) { if(flip == state.watcher1(clause)) state.watcher1(clause, state.watcher2(clause)); state.inc_breakcount(state.watcher1(clause), state.weight(clause)); } else { if(flip == state.watcher1(clause)) { for(auto literal : clause) if(state.is_sat(literal) && literal.variable() != flip && literal.variable() != state.watcher2(clause)) { state.watcher1(clause, literal.variable()); break; } } else if(flip == state.watcher2(clause)) for(auto literal : clause) if(state.is_sat(literal) && literal.variable() != flip && literal.variable() != state.watcher1(clause)) { state.watcher2(clause, literal.variable()); break; } } state.dec_num_true_literals(clause); }