Exemplo n.º 1
0
        void Piece::updateTrajectory(const Board* board, const Position& oldPos, const Position& newPos)
        {
            Log::Debug::writeln("Updating", 1);

            bool foundPos = false;
            for (posList::iterator iter = trajectory.begin(); iter != trajectory.end(); iter++)
            {
                if (*iter == oldPos || *iter == newPos)
                {
                    foundPos = true;
                    break;
                }
            }
            if (foundPos) makeTrajectory(board);
        }
void makeMultiExperimentPOMCP(
                    unsigned numExperiments, unsigned numTargets,
                    unsigned modelHorizon,   const Model  & model,            const ap::Belief & modelBelief,
                    unsigned solverHorizon,  std::vector<Solver> & solvers,   const ap::Belief & solverBelief,
                    const std::string & outputFilename, bool useTrajectory = false )
{
    static std::default_random_engine rand(AIToolbox::Impl::Seeder::getSeed());

    double totalReward = 0.0;
    std::vector<double> timestepTotalReward(modelHorizon, 0.0);
    double avgReward   = 0.0;

    std::vector<size_t> obs(numTargets), pos(numTargets);

    std::cout << numExperiments << " experiments with: Submodular POMCP! ";

    std::cout << "Initial Belief: " << printBelief(modelBelief)  << '\n';
    std::cout << "Solver  Belief: " << printBelief(solverBelief) << '\n';

    unsigned experiment = 1;
    for ( ; experiment <= numExperiments; ++experiment ) {
        // Run pomcp, but don't get actions yet
        for ( unsigned p = 0; p < numTargets; ++p ) {
            pos[p] = AIToolbox::sampleProbability(model.getS(), modelBelief, rand);
            solvers[p].sampleAction(solverBelief, std::min(solverHorizon, modelHorizon));
        }
        // Extract action
        size_t a = extractAction(solvers);

        std::vector<std::vector<size_t>> trajectories;
        if ( useTrajectory ) {
            for ( unsigned p = 0; p < numTargets; ++p ) trajectories.push_back( makeTrajectory(model, modelHorizon + 1, modelBelief) );
        }

        for ( unsigned i = 1; i <= modelHorizon; ++i ) {
            double rew = 0.0;
#ifdef VISUALIZE
            auto oldp = pos;
#endif
            // Extract observations and rewards, and update positions of targets.
            for ( unsigned p = 0; p < numTargets; ++p ) {
                rew += ( model.getTrueState(solvers[p].getGuess()) == model.getTrueState(pos[p]) );

                if ( useTrajectory ) {
                    pos[p] = trajectories[p][i];
                    std::tie(obs[p], std::ignore) = model.sampleOR( trajectories[p][i-1], a, trajectories[p][i] );
                }
                else
                    std::tie(pos[p], obs[p], std::ignore) = model.sampleSOR( pos[p], a );
            }

            totalReward              += rew;
            timestepTotalReward[i-1] += rew;
            if ( experiment == 1 )
                avgReward           = totalReward;
            else
                avgReward           = totalReward / (experiment - 1 + ((double)i)/modelHorizon);

            std::cout // << "[S = " << s << "][A = " << a << "][S1 = " << s1 << "][ O = " << o << " ][ R = " << rew << " ]"
                      << "EXPERIMENT "      << std::setw(4) << experiment
                      << ", TIMESTEP "      << std::setw(4) << i
                      << "\tTotal rew: "    << std::setw(4) << totalReward
                      << "\tAvg: "          << std::setw(4) << avgReward;
#ifdef VISUALIZE
            std::cout << '\n';
            model.visualize(oldp, a);
#else
            std::cout << '\r'               << std::flush;
#endif

            for ( unsigned p = 0; p < numTargets; ++p )
                solvers[p].sampleAction(a, obs[p], std::min(solverHorizon, modelHorizon - i));

            a = extractAction(solvers);
        }
        if ( processInterrupted ) break;
        if ( ! (experiment % 100) )
            gnuplotCumulativeSave(timestepTotalReward, outputFilename, experiment);
    }
    gnuplotCumulativeSave(timestepTotalReward, outputFilename, std::min(experiment, numExperiments));
}