Esempio n. 1
0
 /**
  * Store the given experience.
  *
  * @param state Given state.
  * @param action Given action.
  * @param reward Given reward.
  * @param nextState Given next state.
  * @param isEnd Whether next state is terminal state.
  */
 void Store(const StateType& state,
            ActionType action,
            double reward,
            const StateType& nextState,
            bool isEnd)
 {
   states.col(position) = state.Encode();
   actions(position) = action;
   rewards(position) = reward;
   nextStates.col(position) = nextState.Encode();
   isTerminal(position) = isEnd;
   position++;
   if (position == capacity)
   {
     full = true;
     position = 0;
   }
 }
Esempio n. 2
0
int main(int argc, char **argv) {
   OptionParser opts;

   string mapFile, evidFile;

   int factor;

   opts.addOption(new StringOption("map", 
            "--map <filename>                 : map file",
            "../input/grid.bmp", mapFile, false));

   opts.addOption(new StringOption("evidence", 
            "--evidence <filename>            : evidence file",
            "", evidFile, true));

   opts.addOption(new IntOption("factor",
            "--factor <int>                   : scaling factor",
            1, factor, true));

   opts.parse(argc,argv);
   JetColorMap jet;
   RGBTRIPLE black = {0,0,0};

   RGBTRIPLE white = {255,255,255};

   RGBTRIPLE red;
   red.R = 255;
   red.G = 0;
   red.B = 0;

   RGBTRIPLE blue;
   blue.R = 0;
   blue.G = 0;
   blue.B = 255;

   RGBTRIPLE green;
   green.R = 0;
   green.G = 255;
   green.B = 0; 

   RGBTRIPLE initialColor;
   initialColor.R = 111; 
   initialColor.G = 49;
   initialColor.B = 152;
//   initialColor.G = 152;
//   initialColor.B = 49;


   RGBTRIPLE currentColor;
   currentColor.R = 181;
   currentColor.G = 165;
   currentColor.B = 213;
//   currentColor.G = 213;
//   currentColor.B = 165;


   RGBTRIPLE magenta;
   magenta.R = 255;
   magenta.G = 0;
   magenta.B = 255;

   RGBTRIPLE cyan;
   cyan.R = 0;
   cyan.G = 255;
   cyan.B = 255;

   RGBTRIPLE yellow;
   yellow.R = 255;
   yellow.G = 255;
   yellow.B = 0;

   BMPFile bmpFile(mapFile);

   Grid grid(bmpFile, black);

   
   Evidence testSet(evidFile, grid, factor);
 /* 
   if (1) { 
	   evid.split(trainSet, testSet, 0.8);
   }else{
	   evid.deterministicsplit(trainSet, testSet);
   }*/

#if 0 
   cout << "Creating Markov Model"<<endl;
   MarkovModel markmodel(grid, trainSet);

   double totalObj = 0.0;

   for (int i=0; i < testSet.size(); i++) {
      vector<pair<int, int> > path = testSet.at(i);
      cout << "Calling eval"<<endl;
      double obj = markmodel.eval(path);
      cout << "OBJ: "<<i<<" "<<obj<<endl;
	
      totalObj += obj;
   }

   cout << "TOTAL OBJ: "<<totalObj<<endl;

   cout << "AVERAGE OBJ: "<<totalObj/testSet.size()<<endl;
   return 0;
#endif
   vector<PosFeature> features;

   cout << "Constant Feature"<<endl;

   ConstantFeature constFeat(grid);
   features.push_back(constFeat);

   cout << "Obstacle Feature"<<endl;

   ObstacleFeature obsFeat(grid);
   features.push_back(obsFeat);

   for (int i=1; i < 5; i++) {
      cout << "Blur Feature "<<i<<endl;
      ObstacleBlurFeature blurFeat(grid, 5*i);
      features.push_back(blurFeat);
   }

   cout << "Creating feature array"<<endl;
   FeatureArray featArray2(features);

   cout << "Creating lower resolution feature array"<<endl;
   FeatureArray featArray(featArray2, factor);

   pair<int, int> dims = grid.dims();
   pair<int, int> lowDims((int)ceil((float)dims.first/factor),
         (int)ceil((float)dims.second/factor));

   vector<double> weights(features.size(), -0.0);
   weights.at(1) = -6.2;
   //for (int i=2; i < weights.size(); i++)
   //   weights.at(i) = -1.0;
   weights.at(0) = -2.23;//-2.23
   weights.at(2) = -0.35;
   weights.at(3) = -2.73;
   weights.at(4) = -0.92;
   weights.at(5) = -0.26;
   Parameters params(weights);

   OrderedWaveInferenceEngine engine(InferenceEngine::GRID8);

   vector<vector<double> > prior(dims.first,vector<double> (dims.second,0.0));
/*
   double divide = 1.0;
   vector<double> radiusWeight;
   for (int i=0; i < 20; i++) {
      radiusWeight.push_back(1.0/divide);
      divide*=2;
   }
   generatePrior(grid, trainSet, priorOrig, radiusWeight, factor);
 
   reducePrior(priorOrig, prior, factor);
*/

   vector<vector<vector<double> > > partition, backpartition;

   int time0 = time(0);

   BMPFile gridView(dims.first, dims.second);



   RewardMap rewards(featArray, params); 

   vector<double> sums(params.size(),0.00001);
      
   vector<vector<double> > occupancy;

   Predictor predictor(grid, rewards, engine); 
   
   predictor.setPrior(prior);


   cout << testSet.size() <<" Examples"<<endl;

   for (int i=0; i < testSet.size(); i++) {

      int index = 0;


      vector<pair<int, int> > traj = testSet.at(i);
      vector<double> times = testSet.getTimes(i); 
      pair<int, int> initial = traj.front();
	  pair<int,int> & botinGrid = testSet.at_bot(i); 
	  pair<double,double>& botinPoint = testSet.at_rbot(i);
	  pair<double,double>& end = testSet.at_raw(i).back();

      predictor.setStart(initial); 

      double thresh = -20.0;
	  double startTime = times.front();

      char buf[1024];
      sprintf(buf, "../output/pppredict%03d.dat", i);
      ofstream file(buf);

      for (double tick = startTime; index < traj.size(); tick+=0.4) {

         for ( ; index < traj.size() && times.at(index) < tick; index++); 

         if (index == traj.size() ) break;
 
         cout << "Evidence: "<<i<<"  timestep: "<<tick
            <<"   index: "<<index<<endl;
         predictor.predict(traj.at(index), occupancy);

         cout << "SIZE: "<<prior.size()<<endl;
		 vector<vector<double> >  pos 
            = predictor.getPosterior();

         gridView.addBelief(pos, -30.0, 0.0,jet);

         grid.addObstacles(gridView, black);
         gridView.addLabel(botinGrid,green);
         vector<pair<int, int> > subTraj;

         subTraj.insert(subTraj.end(), traj.begin(), traj.begin()+index);

         gridView.addVector(subTraj, red, factor);

         sprintf(buf, "../compare/pp%03d-%03f.bmp", i, tick-startTime); 
         gridView.write(buf);
		 //pair<double,double> values = predictor.check(traj.back());
		 double cost = 0.0;
		 for(int itr = 0;itr<index;itr++)
		   cost +=rewards.at(traj[itr].first,traj[itr].second);

		 cout<<i<<" Normalizer: "<<predictor.getNormalizer(traj.back())<<
			 " path cost: "<<cost<<" Probability:  "<<cost+predictor.getNormalizer(traj.back())<<endl;

         vector<vector<vector<double> > > timeOcc 
            = predictor.getTimeOccupancy();

		 vector<vector<double > > posterior  = predictor.getPosterior();
		 double maxV = -HUGE_VAL;
		 pair<int,int> predestGrid;
		 pair<double,double> predestPoint;

         for (int ii=0; ii< dims.first; ii++) { 
            for (int jj=0; jj < dims.second; jj++) {
			   if(posterior[ii][jj]>maxV){
				   predestGrid.first = ii;
				   predestGrid.second = jj;
			   }
               maxV  = max(maxV, posterior.at(ii).at(jj));
            }
         }
		 predestPoint = grid.grid2Real(predestGrid.first,predestGrid.second);
		 double dist = sqrt((end.first-predestPoint.first)*(end.first-predestPoint.first)
			 +(end.second-predestPoint.second)*(end.second-predestPoint.second));

		 double logloss = entropy(posterior);

		 cout<<"final belief: "<<posterior.at(traj.back().first).at(traj.back().second)
			 <<" max: "<<maxV
			 <<" logloss: "<<logloss<<endl; 
		 cout<<botinGrid.first<<" "<<botinGrid.second
			 <<" "<<predestGrid.first<<" "<<predestGrid.second<<endl;
		 file<<tick-startTime
			 <<" "<<logloss
			 <<" "<<posterior.at(botinGrid.first).at(botinGrid.second)
			 <<" "<<posterior.at(traj.back().first).at(traj.back().second)
			 <<" "<<maxV<<" "<<dist<<endl;

      } 
      file.close();
   }

}
Esempio n. 3
0
int main (int argc, char *argv[])
{
    /*************************
     * Initialisation de MPI *
     *************************/

    boost::mpi::environment env(argc, argv, MPI_THREAD_MULTIPLE, true);
    boost::mpi::communicator world;

    /****************************
     * Il faut au moins 4 nœuds *
     ****************************/

    const size_t ALL = world.size();
    const size_t RANK = world.rank();

    /************************
     * Initialisation de EO *
     ************************/

    eoParser parser(argc, argv);
    eoState state;    // keeps all things allocated
    dim::core::State state_dim;    // keeps all things allocated

    /*****************************
     * Definition des paramètres *
     *****************************/

    bool sync = parser.createParam(bool(true), "sync", "sync", 0, "Islands Model").value();
    bool smp = parser.createParam(bool(true), "smp", "smp", 0, "Islands Model").value();
    unsigned nislands = parser.createParam(unsigned(4), "nislands", "Number of islands (see --smp)", 0, "Islands Model").value();
    // a
    double alphaP = parser.createParam(double(0.2), "alpha", "Alpha Probability", 'a', "Islands Model").value();
    double alphaF = parser.createParam(double(0.01), "alphaF", "Alpha Fitness", 'A', "Islands Model").value();
    // b
    double betaP = parser.createParam(double(0.01), "beta", "Beta Probability", 'b', "Islands Model").value();
    // d
    double probaSame = parser.createParam(double(100./(smp ? nislands : ALL)), "probaSame", "Probability for an individual to stay in the same island", 'd', "Islands Model").value();
    // I
    bool initG = parser.createParam(bool(true), "initG", "initG", 'I', "Islands Model").value();

    bool update = parser.createParam(bool(true), "update", "update", 'U', "Islands Model").value();
    bool feedback = parser.createParam(bool(true), "feedback", "feedback", 'F', "Islands Model").value();
    bool migrate = parser.createParam(bool(true), "migrate", "migrate", 'M', "Islands Model").value();
    unsigned nmigrations = parser.createParam(unsigned(1), "nmigrations", "Number of migrations to do at each generation (0=all individuals are migrated)", 0, "Islands Model").value();
    unsigned stepTimer = parser.createParam(unsigned(1000), "stepTimer", "stepTimer", 0, "Islands Model").value();
    bool deltaUpdate = parser.createParam(bool(true), "deltaUpdate", "deltaUpdate", 0, "Islands Model").value();
    bool deltaFeedback = parser.createParam(bool(true), "deltaFeedback", "deltaFeedback", 0, "Islands Model").value();
    double sensitivity = 1 / parser.createParam(double(1.), "sensitivity", "sensitivity of delta{t} (1/sensitivity)", 0, "Islands Model").value();
    std::string rewardStrategy = parser.createParam(std::string("avg"), "rewardStrategy", "Strategy of rewarding: best or avg", 0, "Islands Model").value();

    std::vector<double> rewards(smp ? nislands : ALL, 1.);
    std::vector<double> timeouts(smp ? nislands : ALL, 1.);

    for (size_t i = 0; i < (smp ? nislands : ALL); ++i)
    {
        std::ostringstream ss;
        ss << "reward" << i;
        rewards[i] = parser.createParam(double(1.), ss.str(), ss.str(), 0, "Islands Model").value();
        ss.str("");
        ss << "timeout" << i;
        timeouts[i] = parser.createParam(double(1.), ss.str(), ss.str(), 0, "Islands Model").value();
    }

    /*********************************
     * Déclaration des composants EO *
     *********************************/

    unsigned chromSize = parser.getORcreateParam(unsigned(0), "chromSize", "The length of the bitstrings", 'n',"Problem").value();
    eoInit<EOT>& init = dim::do_make::genotype(parser, state, EOT(), 0);

    eoEvalFunc<EOT>* ptEval = NULL;
    ptEval = new SimulatedEval( rewards[RANK] );
    state.storeFunctor(ptEval);

    eoEvalFuncCounter<EOT> eval(*ptEval);

    unsigned popSize = parser.getORcreateParam(unsigned(100), "popSize", "Population Size", 'P', "Evolution Engine").value();
    dim::core::Pop<EOT>& pop = dim::do_make::detail::pop(parser, state, init);

    double targetFitness = parser.getORcreateParam(double(1000), "targetFitness", "Stop when fitness reaches",'T', "Stopping criterion").value();
    unsigned maxGen = parser.getORcreateParam(unsigned(0), "maxGen", "Maximum number of generations () = none)",'G',"Stopping criterion").value();
    dim::continuator::Base<EOT>& continuator = dim::do_make::continuator<EOT>(parser, state, eval);

    dim::core::IslandData<EOT> data(smp ? nislands : -1);

    std::string monitorPrefix = parser.getORcreateParam(std::string("result"), "monitorPrefix", "Monitor prefix filenames", '\0', "Output").value();
    dim::utils::CheckPoint<EOT>& checkpoint = dim::do_make::checkpoint<EOT>(parser, state, continuator, data, 1, stepTimer);

    /**************
     * EO routine *
     **************/

    make_parallel(parser);
    make_verbose(parser);
    make_help(parser);

    if (!smp) // no smp enabled use mpi instead
    {

        /****************************************
         * Distribution des opérateurs aux iles *
         ****************************************/

        eoMonOp<EOT>* ptMon = NULL;
        if (sync)
        {
            ptMon = new DummyOp;
        }
        else
        {
            ptMon = new SimulatedOp( timeouts[RANK] );
        }
        state.storeFunctor(ptMon);

        /**********************************
         * Déclaration des composants DIM *
         **********************************/

        dim::core::ThreadsRunner< EOT > tr;

        dim::evolver::Easy<EOT> evolver( /*eval*/*ptEval, *ptMon, false );

        dim::feedbacker::Base<EOT>* ptFeedbacker = NULL;
        if (feedback)
        {
            if (sync)
            {
                ptFeedbacker = new dim::feedbacker::sync::Easy<EOT>(alphaF);
            }
            else
            {
                ptFeedbacker = new dim::feedbacker::async::Easy<EOT>(alphaF, sensitivity, deltaFeedback);
            }
        }
        else
        {
            ptFeedbacker = new dim::algo::Easy<EOT>::DummyFeedbacker();
        }
        state_dim.storeFunctor(ptFeedbacker);

        dim::vectorupdater::Base<EOT>* ptUpdater = NULL;
        if (update)
        {
            dim::vectorupdater::Reward<EOT>* ptReward = NULL;
            if (rewardStrategy == "best")
            {
                ptReward = new dim::vectorupdater::Best<EOT>(alphaP, betaP);
            }
            else
            {
                ptReward = new dim::vectorupdater::Average<EOT>(alphaP, betaP, sensitivity, sync ? false : deltaUpdate);
            }
            state_dim.storeFunctor(ptReward);

            ptUpdater = new dim::vectorupdater::Easy<EOT>(*ptReward);
        }
        else
        {
            ptUpdater = new dim::algo::Easy<EOT>::DummyVectorUpdater();
        }
        state_dim.storeFunctor(ptUpdater);

        dim::memorizer::Easy<EOT> memorizer;

        dim::migrator::Base<EOT>* ptMigrator = NULL;
        if (migrate)
        {
            if (sync)
            {
                ptMigrator = new dim::migrator::sync::Easy<EOT>();
            }
            else
            {
                ptMigrator = new dim::migrator::async::Easy<EOT>(nmigrations);
            }
        }
        else
        {
            ptMigrator = new dim::algo::Easy<EOT>::DummyMigrator();
        }
        state_dim.storeFunctor(ptMigrator);

        dim::algo::Easy<EOT> island( evolver, *ptFeedbacker, *ptUpdater, memorizer, *ptMigrator, checkpoint, monitorPrefix );

        if (!sync)
        {
            tr.addHandler(*ptFeedbacker).addHandler(*ptMigrator).add(island);
        }

        /***************
         * Rock & Roll *
         ***************/

        /******************************************************************************
         * Création de la matrice de transition et distribution aux iles des vecteurs *
         ******************************************************************************/

        dim::core::MigrationMatrix probabilities( ALL );
        dim::core::InitMatrix initmatrix( initG, probaSame );

        if ( 0 == RANK )
        {
            initmatrix( probabilities );
            std::cout << probabilities;
            data.proba = probabilities(RANK);

            for (size_t i = 1; i < ALL; ++i)
            {
                world.send( i, 100, probabilities(i) );
            }

            std::cout << "Island Model Parameters:" << std::endl
                      << "alphaP: " << alphaP << std::endl
                      << "alphaF: " << alphaF << std::endl
                      << "betaP: " << betaP << std::endl
                      << "probaSame: " << probaSame << std::endl
                      << "initG: " << initG << std::endl
                      << "update: " << update << std::endl
                      << "feedback: " << feedback << std::endl
                      << "migrate: " << migrate << std::endl
                      << "sync: " << sync << std::endl
                      << "stepTimer: " << stepTimer << std::endl
                      << "deltaUpdate: " << deltaUpdate << std::endl
                      << "deltaFeedback: " << deltaFeedback << std::endl
                      << "sensitivity: " << sensitivity << std::endl
                      << "chromSize: " << chromSize << std::endl
                      << "popSize: " << popSize << std::endl
                      << "targetFitness: " << targetFitness << std::endl
                      << "maxGen: " << maxGen << std::endl
                      ;
        }
        else
        {
            world.recv( 0, 100, data.proba );
        }

        /******************************************
         * Get the population size of all islands *
         ******************************************/

        world.barrier();
        dim::utils::print_sum(pop);

        FitnessInit fitInit;

        apply<EOT>(fitInit, pop);

        if (sync)
        {
            island( pop, data );
        }
        else
        {
            tr( pop, data );
        }

        world.abort(0);

        return 0 ;

    }

    // smp

    /**********************************
     * Déclaration des composants DIM *
     **********************************/

    dim::core::ThreadsRunner< EOT > tr;

    std::vector< dim::core::Pop<EOT> > islandPop(nislands);
    std::vector< dim::core::IslandData<EOT> > islandData(nislands);

    dim::core::MigrationMatrix probabilities( nislands );
    dim::core::InitMatrix initmatrix( initG, probaSame );

    initmatrix( probabilities );
    std::cout << probabilities;

    FitnessInit fitInit;

    for (size_t i = 0; i < nislands; ++i)
    {
        std::cout << "island " << i << std::endl;

        islandPop[i].append(popSize, init);

        apply<EOT>(fitInit, islandPop[i]);

        islandData[i] = dim::core::IslandData<EOT>(nislands, i);

        std::cout << islandData[i].size() << " " << islandData[i].rank() << std::endl;

        islandData[i].proba = probabilities(i);
        apply<EOT>(eval, islandPop[i]);

        /****************************************
         * Distribution des opérateurs aux iles *
         ****************************************/

        eoMonOp<EOT>* ptMon = NULL;
        ptMon = new SimulatedOp( timeouts[islandData[i].rank()] );
        state.storeFunctor(ptMon);

        eoEvalFunc<EOT>* __ptEval = NULL;
        __ptEval = new SimulatedEval( rewards[islandData[i].rank()] );
        state.storeFunctor(__ptEval);

        dim::evolver::Base<EOT>* ptEvolver = new dim::evolver::Easy<EOT>( /*eval*/*__ptEval, *ptMon, false );
        state_dim.storeFunctor(ptEvolver);

        dim::feedbacker::Base<EOT>* ptFeedbacker = new dim::feedbacker::smp::Easy<EOT>(islandPop, islandData, alphaF);
        state_dim.storeFunctor(ptFeedbacker);

        dim::vectorupdater::Reward<EOT>* ptReward = NULL;
        if (rewardStrategy == "best")
        {
            ptReward = new dim::vectorupdater::Best<EOT>(alphaP, betaP);
        }
        else
        {
            ptReward = new dim::vectorupdater::Average<EOT>(alphaP, betaP, sensitivity, sync ? false : deltaUpdate);
        }
        state_dim.storeFunctor(ptReward);

        dim::vectorupdater::Base<EOT>* ptUpdater = new dim::vectorupdater::Easy<EOT>(*ptReward);
        state_dim.storeFunctor(ptUpdater);

        dim::memorizer::Base<EOT>* ptMemorizer = new dim::memorizer::Easy<EOT>();
        state_dim.storeFunctor(ptMemorizer);

        dim::migrator::Base<EOT>* ptMigrator = new dim::migrator::smp::Easy<EOT>(islandPop, islandData, monitorPrefix);
        state_dim.storeFunctor(ptMigrator);

        dim::utils::CheckPoint<EOT>& checkpoint = dim::do_make::checkpoint<EOT>(parser, state, continuator, islandData[i], 1, stepTimer);

        dim::algo::Base<EOT>* ptIsland = new dim::algo::smp::Easy<EOT>( *ptEvolver, *ptFeedbacker, *ptUpdater, *ptMemorizer, *ptMigrator, checkpoint, islandPop, islandData, monitorPrefix );
        state_dim.storeFunctor(ptIsland);

        ptEvolver->size(nislands);
        ptFeedbacker->size(nislands);
        ptReward->size(nislands);
        ptUpdater->size(nislands);
        ptMemorizer->size(nislands);
        ptMigrator->size(nislands);
        ptIsland->size(nislands);

        ptEvolver->rank(i);
        ptFeedbacker->rank(i);
        ptReward->rank(i);
        ptUpdater->rank(i);
        ptMemorizer->rank(i);
        ptMigrator->rank(i);
        ptIsland->rank(i);

        tr.add(*ptIsland);
    }

    tr(pop, data);

    return 0 ;
}
    pair<double,double> CoCheckersExperiment::playGame(
        shared_ptr<NEAT::GeneticIndividual> ind1,
        shared_ptr<NEAT::GeneticIndividual> ind2
    )
    {
        //You get 1 point just for entering the game, wahooo!
        pair<double,double> rewards(1.0,1.0);

#if DEBUG_GAME_ANNOUNCER
        cout << "Playing game\n";
#endif

        populateSubstrate(ind1,0);
        populateSubstrate(ind2,1);

        uchar b[8][8];

        //cout << "Playing games with HyperNEAT as black\n";
        //for (handCodedType=0;handCodedType<5;handCodedType++)

        for (testCases=0;testCases<2;testCases++)
        {
            if (testCases==0)
            {
                individualBlack = ind1;
                individualWhite = ind2;
            }
            else //testCases==1
            {
                individualBlack = ind2;
                individualWhite = ind1;
            }

            resetBoard(b);

            int retval=-1;
            int rounds=0;

            for (rounds=0;rounds<CHECKERS_MAX_ROUNDS&&retval==-1;rounds++)
            {
                //cout << "Round: " << rounds << endl;
                moveToMake = CheckersMove();

                if (testCases==0)
                {
                    currentSubstrateIndex=0;
                }
                else //testCases==1
                {
                    currentSubstrateIndex=1;
                }

                //cout << "Black is thinking...\n";
                evaluatemax(b,CheckersNEATDatatype(INT_MAX/2),0,2);

#if CHECKERS_EXPERIMENT_DEBUG
                cout << "BLACK MAKING MOVE\n";

                printBoard(b);
#endif

                if (moveToMake.from.x==255)
                {
                    //black loses
                    cout << "BLACK LOSES!\n";
                    retval = WHITE;
                }
                else
                {
                    makeMove(moveToMake,b);
                    retval = getWinner(b,WHITE);
                }

#if CHECKERS_EXPERIMENT_LOG_EVALUATIONS
                memcpy(gameLog[rounds*2],b,sizeof(uchar)*8*8);
#endif

#if COCHECKERS_EXPERIMENT_DEBUG
                printBoard(b);
                CREATE_PAUSE("");
#endif

                if (retval==-1)
                {
                    //printBoard(b);

                    moveToMake = CheckersMove();
                    {
                        //progress_timer t;
                        if (testCases==0)
                        {
                            currentSubstrateIndex=1;
                        }
                        else //testCases==1
                        {
                            currentSubstrateIndex=0;
                        }

                        //cout << "White is thinking...\n";
                        evaluatemin(b,CheckersNEATDatatype(INT_MAX/2),0,3);
                        //cout << "SimpleCheckers time: ";
                    }

#if COCHECKERS_EXPERIMENT_DEBUG
                    cout << "WHITE MAKING MOVE\n";

                    printBoard(b);
#endif

                    if (moveToMake.from.x==255)
                    {
                        //white loses
                        cout << "WHITE LOSES BECAUSE THERE'S NO MOVES LEFT!\n";
                        retval = BLACK;
#if COCHECKERS_EXPERIMENT_DEBUG
                        printBoard(b);
                        CREATE_PAUSE("");
#endif
                    }
                    else
                    {
                        makeMove(moveToMake,b);
                        retval = getWinner(b,BLACK);
                    }

#if COCHECKERS_EXPERIMENT_DEBUG
                    printBoard(b);
                    CREATE_PAUSE("");
#endif
                }

#if CHECKERS_EXPERIMENT_LOG_EVALUATIONS
                memcpy(gameLog[rounds*2+1],b,sizeof(uchar)*8*8);
#endif
            }

            if (retval==BLACK)
            {
#if DEBUG_GAME_ANNOUNCER
                cout << "BLACK WON!\n";
#endif
                if (ind1==individualBlack)
                {
                    rewards.first += 800;
                    rewards.first += (CHECKERS_MAX_ROUNDS-rounds);
                }
                else
                {
                    rewards.second += 800;
                    rewards.second += (CHECKERS_MAX_ROUNDS-rounds);
                }

            }
            else if (retval==-1) //draw
            {
#if DEBUG_GAME_ANNOUNCER
                cout << "WE TIED!\n";
#endif
                //rewards.first += 200;
                //rewards.second += 200;
            }
            else //White wins
            {
#if DEBUG_GAME_ANNOUNCER
                cout << "WHITE WON\n";
#endif
                if (ind1==individualWhite)
                {
                    rewards.first += 800;
                    rewards.first += (CHECKERS_MAX_ROUNDS-rounds);
                }
                else
                {
                    rewards.second += 800;
                    rewards.second += (CHECKERS_MAX_ROUNDS-rounds);
                }
            }

            int whiteMen,blackMen,whiteKings,blackKings;

            //countPieces(gi.board,whiteMen,blackMen,whiteKings,blackKings);
            countPieces(b,whiteMen,blackMen,whiteKings,blackKings);

            if (ind1==individualWhite)
            {
                rewards.first += (2 * (whiteMen) );
                rewards.first += (3 * (whiteKings) );

                rewards.second += (2 * (blackMen) );
                rewards.second += (3 * (blackKings) );
            }
            else
            {
                rewards.first += (2 * (blackMen) );
                rewards.first += (3 * (blackKings) );

                rewards.second += (2 * (whiteMen) );
                rewards.second += (3 * (whiteKings) );
            }
        }

#if DEBUG_GAME_ANNOUNCER
        cout << "Fitness earned: " << rewards.first << " & " << rewards.second << endl;
        CREATE_PAUSE("");
#endif

        return rewards;
    }
Esempio n. 5
-1
double trajOptimizerplus::eval(vector<double> &params) {

    cout << "IN EVAL "<<itrcount++<<" "<<params.size()<<endl;



    for (int i=0; i < params.size(); i++)
        cout << "PARAMS IN: "<<i<<" "<<params.at(i)<<endl;


    int factor = evidence.getFactor();

    pair<int, int> dims = grid.dims();
    int v_dim = seqFeat.num_V();

    /*
    pair<int, int> lowDims((int)ceil((float)dims.first/factor),
          (int)ceil((float)dims.second/factor));
    */
    vector<vector<vector<double> > >
    prior(dims.first, vector<vector<double> >(dims.second,
            vector<double> (v_dim,-HUGE_VAL)));

    double obj = 0.0;
    vector<double> gradient(params.size(), 0.0);
    vector<vector<vector<double> > > occupancy;
    vector<vector<double> > layerOccupancy;
    layerOccupancy.resize(dims.first,vector<double>(dims.second,-HUGE_VAL));
    vector<double> modelFeats, pathFeats;

    for (int i=0; i < evidence.size(); i++) {
        for (int j=0; j < params.size(); j++) {
            cout << "  "<<j<<" "<<params.at(j);
        }
        cout<<endl;

        cout << "Evidence #"<<i<<endl;
        vector<pair<int, int> >&  trajectory = evidence.at(i);
        vector<double>& velocityseq = evidence.at_v(i);
        pair<int,int>&  bot = evidence.at_bot(i);

        //  robot local blurres features
        for (int r=1; r <= NUMROBFEAT; r++) {
            cout << "Adding  Robot Feature "<<r<<endl;
            RobotLocalBlurFeature robblurFeat(grid,bot,10*r);
            //	RobotGlobalFeature robFeat(grid,bot);
            posFeatures.push_back(robblurFeat);
        }

        cout << "   Creating feature array"<<endl;
        FeatureArray featArray2(posFeatures);
        FeatureArray featArray(featArray2, factor);

        for (int rr=1; rr<= NUMROBFEAT; rr++)
            posFeatures.pop_back();

        // split different posfeatures and seqfeature weights
        vector<double> p_weights,s_weights;
        int itr = 0;
        for (; itr<featArray.size(); itr++)
            p_weights.push_back(params[itr]);
        for (; itr<params.size(); itr++)
            s_weights.push_back(params[itr]);

        //cout<<"Params"<<endl;
        Parameters p_parameters(p_weights), s_parameters(s_weights);
        /*    cout<<featArray.size()<<endl;
        	  cout<<params.size()<<endl;
        	  cout<<p_weights.size()<<endl;
        	  cout<<s_weights.size()<<endl;
        	  cout<<p_parameters.size()<<endl;
        	  cout<<s_parameters.size()<<endl;
        */
        //cout<<"Reward"<<endl;
        RewardMap rewards(featArray,seqFeat,p_parameters,s_parameters);
        DisSeqPredictor predictor(grid, rewards, engine);

        // sum of reward along the trajectory
        double cost = 0.0;
        //cout<< trajectory.size()<<endl;
        for (int j=0; j < trajectory.size(); j++) {
            //cout<<j<<" "<<trajectory.at(j).first<<" "<< trajectory.at(j).second<< " "<< seqFeat.getFeat(velocityseq.at(j))<<endl;
            cost+=rewards.at(trajectory.at(j).first, trajectory.at(j).second, seqFeat.getFeat(velocityseq.at(j)));
        }
        State initial(trajectory.front(),seqFeat.getFeat(velocityseq.front()));
        State destination(trajectory.back(),seqFeat.getFeat(velocityseq.back()));
        //for (int k=0;k<v_dim;k++)
        prior.at(destination.x()).at(destination.y()).at(destination.disV) = 0.0;

        cout << "Initial: "<<initial.x()<<"  "<<initial.y()<<"  "<<initial.disV<<endl;
        cout << "Destination: "<<destination.x()<<"  "
             <<destination.y()<<" "<<destination.disV<<endl;
        predictor.setStart(initial);
        predictor.setPrior(prior);

        double norm = predictor.forwardBackwardInference(initial, occupancy);

        for (int l=0; l<v_dim; l++) {
            BMPFile gridView(dims.first, dims.second);
            for (int x= 0; x<dims.first; x++) {
                for(int y=0; y<dims.second; y++) {
                    layerOccupancy.at(x).at(y) = occupancy.at(x).at(y).at(l);
                }
            }

            char buf[1024];
            /*
            RobotGlobalFeature robblurFeat(grid,bot);
            gridView.addBelief(robblurFeat.getMap(), 0.0, 25, white, red);
            gridView.addVector(trajectory, blue, factor);
            gridView.addLabel(bot,green);
            sprintf(buf, "../figures/feat%04d_%d.bmp",i,l);
            gridView.write(buf);
            */

            gridView.addBelief(layerOccupancy, -300.0, 5.0, white, red);
            //grid.addObstacles(gridView, black);
            gridView.addLabel(bot,green);
            gridView.addVector(trajectory, blue, factor);

            sprintf(buf, "../figures/train%04d_%d.bmp",i,l);
            gridView.write(buf);
        }


        /*
        for (int i=0; i < occupancy.size(); i++)
           for (int j=0; j < occupancy.at(i).size(); j++)
              if (occupancy.at(i).at(j) > -10)
                 cout << i <<" "<<j<<"    "<<occupancy.at(i).at(j)<<endl;
        */
        featArray.featureCounts(occupancy, modelFeats);

        featArray.featureCounts(trajectory, pathFeats);


        seqFeat.featureCounts_vec(occupancy,modelFeats);
        seqFeat.featureCounts_vec(velocityseq,pathFeats);

        for (int k=0; k < params.size(); k++) {
            double diff = pathFeats.at(k) - modelFeats.at(k);
            gradient.at(k) -= diff;
            cout <<" Gradient ("<< k << " -grad: "<< gradient.at(k) <<" -path: "<<
                 pathFeats.at(k)<<" -model: "<< modelFeats.at(k)<<")";
        }
        cout<<endl;
        cout << "OBJ: "<<cost-norm<< "  "<<cost<<"  "<<norm<<endl;
        obj += (cost - norm);
        /* obj is the path probability
         * cost is the sum of rewards: sum f(s,a)
         * norm is V(s_1->G), since here s_T = G, V(s_T->G) = 0*/
        prior.at(destination.x()).at(destination.y()).at(destination.disV)
            = -HUGE_VAL;
    }

    cout << "RETURN OBJ: "<<-obj<<endl;

    params = gradient;

    return -obj;
}
Esempio n. 6
-1
double trajectoryOptimizer::eval(vector<double> &params, vector<double> &gradient) {

   cout << "IN EVAL   "<<params.size()<<endl;


   for (int i=0; i < params.size(); i++) 
      cout << "PARAMS IN: "<<i<<" "<<params.at(i)<<endl;

   int factor = evidence.getFactor();

  // cout << "FACTOR: "<<factor<<endl;
  
   FeatureArray featArray2(features);

   FeatureArray featArray(featArray2, factor);
   //cout<<"Dims featarray  "<<featArray.dims().first<<" "<<featArray.dims().second<<endl;

   Parameters parameters(params);

   //cout << "Calculating rewards"<<endl;

   RewardMap rewards(featArray, parameters); 

   pair<int, int> dims = grid.dims();

   BMPFile gridView(dims.first, dims.second);

   pair<int, int> lowDims((int)ceil((float)dims.first/factor),
         (int)ceil((float)dims.second/factor));

   //cout << "Computing prior"<<endl;
   vector<vector<double> > prior(lowDims.first, vector<double>(lowDims.second, 
            -HUGE_VAL)); 

   double obj = 0.0;
   gradient.clear();
   gradient.resize(params.size(), 0.0); 

   for (int i=0; i < evidence.size(); i++) {

      Predictor predictor(grid, rewards, engine); 

      cout << "Evidence #"<<i<<endl;
      vector<pair<int, int> > trajectory = evidence.at(i);

      double cost = 0.0;
      for (int j=0; j < trajectory.size(); j++){
		  double temp = rewards.at(trajectory.at(j).first, 
					  trajectory.at(j).second);
		  cost += temp;
	  }
	  

      pair<int, int> initial = trajectory.front();
      pair<int, int> destination = trajectory.back();

      prior.at(destination.first).at(destination.second) = 0.0;
#if 0
      cout << "Initial: "<<initial.first<<"  "<<initial.second<<endl;
      cout << "Destination: "<<destination.first<<"  "
         <<destination.second<<endl;
#endif
      predictor.setStart(initial);
      predictor.setPrior(prior);

      vector<vector<double> > occupancy;
      double norm = predictor.predict(initial, occupancy);

      gridView.addBelief(occupancy, -300.0, 0.0, white, red);


      gridView.addVector(trajectory, blue, factor);

      char buf[1024];
      sprintf(buf, "../figures/train%04d.bmp", i);
      gridView.write(buf);

      vector<double> modelFeats, pathFeats;

      //cout << "Computing feature counts"<<endl;

	  /*
      for (int i=0; i < occupancy.size(); i++)
         for (int j=0; j < occupancy.at(i).size(); j++) 
            if (occupancy.at(i).at(j) > -10)
               cout << i <<" "<<j<<"    "<<occupancy.at(i).at(j)<<endl; 
      */

      featArray.featureCounts(occupancy, modelFeats);

      featArray.featureCounts(trajectory, pathFeats);

      cout << "GRADIENT"<<endl;

      for (int k=0; k < params.size(); k++) {
         double diff = pathFeats.at(k) - modelFeats.at(k);
         gradient.at(k) -= diff;
         cout << k << ": " << gradient.at(k) << "    " << pathFeats.at(k)
            << " " << modelFeats.at(k) <<endl;
      }

      cout << "OBJ: "<<cost-norm<<endl;
      cout << "     "<<cost<<"  "<<norm<<endl;
      obj += (cost - norm);

      prior.at(destination.first).at(destination.second) = -HUGE_VAL; 
   }

   cout << "RETURN OBJ: "<<-obj<<endl;

   return -obj;
}