void Piece::updateTrajectory(const Board* board, const Position& oldPos, const Position& newPos) { Log::Debug::writeln("Updating", 1); bool foundPos = false; for (posList::iterator iter = trajectory.begin(); iter != trajectory.end(); iter++) { if (*iter == oldPos || *iter == newPos) { foundPos = true; break; } } if (foundPos) makeTrajectory(board); }
void makeMultiExperimentPOMCP( unsigned numExperiments, unsigned numTargets, unsigned modelHorizon, const Model & model, const ap::Belief & modelBelief, unsigned solverHorizon, std::vector<Solver> & solvers, const ap::Belief & solverBelief, const std::string & outputFilename, bool useTrajectory = false ) { static std::default_random_engine rand(AIToolbox::Impl::Seeder::getSeed()); double totalReward = 0.0; std::vector<double> timestepTotalReward(modelHorizon, 0.0); double avgReward = 0.0; std::vector<size_t> obs(numTargets), pos(numTargets); std::cout << numExperiments << " experiments with: Submodular POMCP! "; std::cout << "Initial Belief: " << printBelief(modelBelief) << '\n'; std::cout << "Solver Belief: " << printBelief(solverBelief) << '\n'; unsigned experiment = 1; for ( ; experiment <= numExperiments; ++experiment ) { // Run pomcp, but don't get actions yet for ( unsigned p = 0; p < numTargets; ++p ) { pos[p] = AIToolbox::sampleProbability(model.getS(), modelBelief, rand); solvers[p].sampleAction(solverBelief, std::min(solverHorizon, modelHorizon)); } // Extract action size_t a = extractAction(solvers); std::vector<std::vector<size_t>> trajectories; if ( useTrajectory ) { for ( unsigned p = 0; p < numTargets; ++p ) trajectories.push_back( makeTrajectory(model, modelHorizon + 1, modelBelief) ); } for ( unsigned i = 1; i <= modelHorizon; ++i ) { double rew = 0.0; #ifdef VISUALIZE auto oldp = pos; #endif // Extract observations and rewards, and update positions of targets. for ( unsigned p = 0; p < numTargets; ++p ) { rew += ( model.getTrueState(solvers[p].getGuess()) == model.getTrueState(pos[p]) ); if ( useTrajectory ) { pos[p] = trajectories[p][i]; std::tie(obs[p], std::ignore) = model.sampleOR( trajectories[p][i-1], a, trajectories[p][i] ); } else std::tie(pos[p], obs[p], std::ignore) = model.sampleSOR( pos[p], a ); } totalReward += rew; timestepTotalReward[i-1] += rew; if ( experiment == 1 ) avgReward = totalReward; else avgReward = totalReward / (experiment - 1 + ((double)i)/modelHorizon); std::cout // << "[S = " << s << "][A = " << a << "][S1 = " << s1 << "][ O = " << o << " ][ R = " << rew << " ]" << "EXPERIMENT " << std::setw(4) << experiment << ", TIMESTEP " << std::setw(4) << i << "\tTotal rew: " << std::setw(4) << totalReward << "\tAvg: " << std::setw(4) << avgReward; #ifdef VISUALIZE std::cout << '\n'; model.visualize(oldp, a); #else std::cout << '\r' << std::flush; #endif for ( unsigned p = 0; p < numTargets; ++p ) solvers[p].sampleAction(a, obs[p], std::min(solverHorizon, modelHorizon - i)); a = extractAction(solvers); } if ( processInterrupted ) break; if ( ! (experiment % 100) ) gnuplotCumulativeSave(timestepTotalReward, outputFilename, experiment); } gnuplotCumulativeSave(timestepTotalReward, outputFilename, std::min(experiment, numExperiments)); }