Пример #1
0
int main(int argc, char **argv)
{
    DecPOMDPDiscreteInterface* decpomdp;
    try {
        ArgumentHandlers::Arguments args;
        argp_parse (&ArgumentHandlers::theArgpStruc, argc, argv, 0, 0, &args);

        Timing times;
        times.Start("Parsing");
        //DecPOMDPDiscreteInterface* 
        decpomdp = GetDecPOMDPDiscreteInterfaceFromArgs(args);
        TransitionObservationIndependentMADPDiscrete *toi=0;
        if((toi=dynamic_cast<TransitionObservationIndependentMADPDiscrete*>(decpomdp)) &&
           args.qheur==eQMDP &&
           !args.cache_flat_models /* otherwise
                                    * GetDecPOMDPDiscreteInterfaceFromArgs
                                    * already caches the flat
                                    * models */)
        {
            // we don't need a centralized obs model
            toi->CreateCentralizedSparseTransitionModel();
        }

        times.Stop("Parsing");

        if(!args.dryrun)
            directories::MADPCreateResultsDir("GMAA",*decpomdp);
        
        size_t horizon;
        if(args.infiniteHorizon)
            horizon=MAXHORIZON;
        else
            horizon=args.horizon;

        times.Start("Overall");

        PlanningUnitMADPDiscreteParameters params;
#if 0 // Caching doesn't seem worth the trouble if we're computing
      // just one thing (not to mention the memory savings)
        if(Qheur==eQMDP) // don't need any of this for solving the MDP
            params.SetComputeAll(false);
        else
        {
            params.SetComputeAll(true);
            params.SetUseSparseJointBeliefs(true);
        }
#else
        params.SetComputeAll(false);
        if(args.sparse)
            params.SetUseSparseJointBeliefs(true);
#endif

        times.Start("PlanningUnit");
        NullPlanner np(params,horizon,decpomdp);
        times.Stop("PlanningUnit");

        struct timeval tvStart, tvEnd;
        gettimeofday (&tvStart, NULL);

        QFunctionJAOHInterface* q=0;
        for(int restartI = 0; restartI < args.nrRestarts; restartI++)
        {
            // with hybrid heuristics already some computation is done
            // before Compute(), so start timing now
            times.Start("ComputeQ");
            q = GetQheuristicFromArgs(&np, args);
            q->Compute();
            times.Stop("ComputeQ");

            // we want to keep the last q computed
            if(restartI<(args.nrRestarts-1))
                delete q;
        }

        gettimeofday (&tvEnd, NULL);

        clock_t wallclockTime = 
            static_cast<clock_t>(((tvEnd.tv_sec - tvStart.tv_sec) +
                                  static_cast<double>(tvEnd.tv_usec-tvStart.tv_usec)/1e6) * sysconf(_SC_CLK_TCK));

        cout << "Wallclock: from "
             << tvStart.tv_sec << "." << tvStart.tv_usec
             << " until "
             << tvEnd.tv_sec << "." << tvEnd.tv_usec
             << " which took " << wallclockTime << " clock ticks"
             << endl;
        
        times.AddEvent("WallclockTime", wallclockTime);

        if(!args.dryrun)
        {
            times.Start("Save");
            q->Save();
            times.Stop("Save");
            if(args.verbose >= 0)
                cout << "Q saved to " << q->GetCacheFilename() << endl;
        }
        times.Stop("Overall");

        if(args.verbose >= 0)
            times.PrintSummary();

        if(!args.dryrun)
        {
            stringstream ss;
            ss << directories::MADPGetResultsDir("GMAA",*decpomdp)
               << "/calculateQheuristic" << q->SoftPrintBrief() << "_h"
               << horizon;
            if(decpomdp->GetDiscount()!=1)
                ss << "_g" << decpomdp->GetDiscount();
            ss << "_Timings";
            times.Save(ss.str());
            if(args.verbose >= 0)
                cout << "Timings saved to " << ss.str() << endl;
        }

        if(horizon!=MAXHORIZON)
        {
            double Vjb0=-DBL_MAX;
            for(Index a=0;a!=np.GetNrJointActions();++a)
                Vjb0=max(q->GetQ(Globals::INITIAL_JAOHI,a),Vjb0);
            cout << "Value of jaohI 0 = " << Vjb0 << endl;
        }

        delete q;
    }
    catch(E& e){ e.Print(); }

    cout << "cleanup..." << endl;
    delete decpomdp;
}
Пример #2
0
int main(int argc, char **argv)
{
    cout << "DICE: direct CE Policy Search"<<endl;
    cout << "-----------------------------"<<endl;
    // parse the command line arguments
    ArgumentHandlers::Arguments args;
    argp_parse (&ArgumentHandlers::theArgpStruc, argc, argv, 0, 0, &args);
    int restarts = args.nrCERestarts;

    srand(time(0));


    int horizon = args.horizon;
    cout << "Horizon = " << horizon << endl;


    try {
    //start timers
    Timing Time;    
    Time.Start("Overall");

    DecPOMDPDiscreteInterface & decpomdp = * GetDecPOMDPDiscreteInterfaceFromArgs(args);

    // setup the output file stream 
    string filename="/dev/null",timingsFilename="/dev/null";
    ofstream of;
    stringstream ss;
    ss << directories::MADPGetResultsFilename("DICEPS",decpomdp,args)
        << "h" << horizon;
    // add the CE parameters into the output file name
    ss  << "_CEr" << args.nrCERestarts 
        << "_i" << args.nrCEIterations
        << "_s" << args.nrCESamples 
        << "_sfu" << args.nrCESamplesForUpdate
        << "_a" << args.CE_alpha 
        << "_ht" << args.CE_use_hard_threshold
        << "_evals" << args.nrCEEvalutionRuns;
    if(!args.dryrun)
    {
        directories::MADPCreateResultsDir("DICEPS",decpomdp);
        filename=ss.str();
        timingsFilename=filename + "_Timings";
    }
    of.open(filename.c_str());
    if(!of)
    {
        cerr << "could not open " << filename << endl;
        return(1);
    }
    cout << "Computing " << ss.str() << endl;
    //write headers
    of << "#horiz."<<"\t";
    of << "value     " <<"\t";
    of << "wctime"<< "\t";
    of << "utime " <<"\t";
    of << "stime " <<"\t";
    of << "found jpol index\t(1tick=1/"<<sysconf(_SC_CLK_TCK)<<"s)\n";
    of.flush();
    
    //Initialization of the planner with typical options for JESP:
    Time.Start("PlanningUnit");
    PlanningUnitMADPDiscreteParameters params;
    params.SetComputeAll(true);
    params.SetComputeJointActionObservationHistories(false);
    params.SetComputeJointActionHistories(false);
    params.SetComputeIndividualActionObservationHistories(false);
    params.SetComputeIndividualActionHistories(false);
    //params.SetComputeIndividualObservationHistories(false);
    // joint observations histories are needed for
    // efficient computation of joint actions
    params.SetComputeJointObservationHistories(true);
    params.SetComputeJointBeliefs(false);
    if(args.sparse)
        params.SetUseSparseJointBeliefs(true);
    else
        params.SetUseSparseJointBeliefs(false);
    DICEPSPlanner* planner;
    planner = new DICEPSPlanner (params, &decpomdp,
        horizon,
        //CE params
        args.nrCERestarts,
        args.nrCEIterations,
        args.nrCESamples,
        args.nrCESamplesForUpdate, 
        args.CE_use_hard_threshold, //(gamma in CE papers)
        args.CE_alpha, //the learning rate
        args.nrCEEvalutionRuns //the number of evaluation runs
        , args.verbose
    );
    Time.Stop("PlanningUnit");
    cout << "DICEPSPlanner initialized" << endl;

    clock_t total_utime_diceps=0;
    double total_value=0;
    for(int restartI = 0; restartI < restarts; restartI++)
    {
        //start all timers:
        tms ts_before, ts_after;
        clock_t ticks_before, ticks_after;
        Time.Start("Plan");
        ticks_before = times(&ts_before);
        planner->Plan();
        //stop all timers
        ticks_after = times(&ts_after);
        Time.Stop("Plan");
        clock_t ticks =  ticks_after - ticks_before;
        clock_t utime =   ts_after.tms_utime - ts_before.tms_utime;
        clock_t stime =   ts_after.tms_stime - ts_before.tms_stime;

        total_utime_diceps+=utime;

        double V = planner->GetExpectedReward();
        if(args.verbose >= 0)
        {
            cout << "value="<< V << endl;
            if(args.verbose)        {
            planner->GetJointPolicyPureVector()->Print();
            cout <<  endl;
            }
        }
        total_value+=V;
        
        of << horizon<<"\t";
        char formvalue[10];
        sprintf(formvalue, "%.6f", V);
        of << formvalue <<"\t";
        of << ticks <<"\t";
        of << utime <<"\t";
        of << stime <<"\t";
        of << "-1\n";//Cannot get index of joint pol., since  "planner->GetJointPolicyPureVector()->GetIndex()" does not work
        of.flush();

        // output average statistics after completing the last restart
        if(restartI==(restarts-1))
            of << "# h " << args.horizon<<"\t"
               << " avg DICEPS time (s): "
               << (static_cast<double>(total_utime_diceps)/
                   sysconf(_SC_CLK_TCK))/restarts
               << " avg value: " << total_value/restarts
               << endl;
    }
    /* clean up */

    Time.Stop("Overall");

    if(args.verbose >= 0)
    {
        Time.PrintSummary();
        planner->PrintTimersSummary();
    }
#if 0
    if(//args.saveTimings && 
            !args.dryrun)
    {
        Time.Save(timingsFilename);
        planner->SaveTimers(timingsFilename);
    }
#endif
    delete planner;
    }
    catch(E& e){ e.Print(); }
}
Пример #3
0
int main(int argc, char **argv)
{
    ArgumentHandlers::Arguments args;
    argp_parse (&ArgumentHandlers::theArgpStruc, argc, argv, 0, 0, &args);
    int restarts = args.nrRestarts;

    srand(time(0));

    int horizon=args.horizon;

    try {
    //start timers
    Timing Time;    
    Time.Start("Overall");

    DecPOMDPDiscreteInterface & decpomdp = * GetDecPOMDPDiscreteInterfaceFromArgs(args);

    //set the filename etc.
    string filename="/dev/null",timingsFilename="/dev/null";
    ofstream of;
    stringstream ss;
    ss  << directories::MADPGetResultsFilename("JESP",decpomdp,args)
        << SoftPrint(args.jesp) //the jesp type
        << "_h" << horizon
        << "_JESPrestarts"<< restarts;
    //check the method specific arguments and add them to file name
    switch(args.jesp)
    {
    case JESPtype::JESPExhaustive:
        break;
    case JESPtype::JESPDP:
        break;
    }
    if(!args.dryrun)
    {
        filename=ss.str();
        timingsFilename=filename + "_Timings";

        of.open(filename.c_str());
        if(!of)
        {
            cout << "JESP: could not open " << filename << endl;
            cout << "Results will not be stored to disk." << endl;
            args.dryrun=true;
        }

        if(!args.dryrun)
        {
            cout << "Computing " << ss.str() << endl;
            //write headers
            of << "#horiz."<<"\t";
            of << "value     " <<"\t";
            of << "ticks"<< "\t";
            of << "utime" <<"\t";
            of << "found jpol index\t(1tick=1/"<<sysconf(_SC_CLK_TCK)<<"s)\n";
            of.flush();
        }
    }
    
    //Initialization of the planner with typical options for JESP:
    Time.Start("PlanningUnit");
    PlanningUnitMADPDiscreteParameters params;
    params.SetComputeAll(true);
    params.SetComputeJointActionObservationHistories(false);
    params.SetComputeJointObservationHistories(false);
    params.SetComputeJointBeliefs(false);
    if(args.sparse)
        params.SetUseSparseJointBeliefs(true);
    else
        params.SetUseSparseJointBeliefs(false);
    PlanningUnitDecPOMDPDiscrete* jesp = 0;
    if(args.jesp == JESPtype::JESPExhaustive)
    {
        jesp = new JESPExhaustivePlanner (params,horizon,&decpomdp);
        cout << "JESPExhaustivePlanner initialized" << endl;
    }
    else if(args.jesp == JESPtype::JESPDP)
    {
        jesp = new JESPDynamicProgrammingPlanner (params,horizon,&decpomdp);
        cout << "JESPDynamicProgrammingPlanner initialized" << endl;
    }
    Time.Stop("PlanningUnit");
    cout << "JESP Planner initialized" << endl;

    for(int restartI = 0; restartI < restarts; restartI++)
    {
        //start all timers:
        Time.Start("Plan");
        tms ts_before, ts_after;
        clock_t ticks_before, ticks_after;
        ticks_before = times(&ts_before);

        jesp->Plan();
        double V = jesp->GetExpectedReward();
        if(args.verbose >= 0)
        {
            cout << "value="<< V << endl;
            if(args.verbose)        {
            jesp->GetJointPolicyPureVector()->Print();
            cout <<  endl;
            }
        }

        //stop all timers
        ticks_after = times(&ts_after);
        clock_t ticks =  ticks_after - ticks_before;
        clock_t utime =   ts_after.tms_utime - ts_before.tms_utime;
        Time.Stop("Plan");

#if CHECK_RESULT
        ValueFunctionDecPOMDPDiscrete vf(jesp, jesp->GetJointPolicyPureVector());
        double v = vf.CalculateV(true);
        cout << "Validated value (exact/approx):="<<v;
        SimulationDecPOMDPDiscrete sim(*jesp, 1000);
        SimulationResult simres = 
            sim.RunSimulations( jesp->GetJointPolicyPureVector() );
        v = simres.GetAvgReward();
        cout << " / "<<v <<endl;
#endif
        if(!args.dryrun)
        {
            of << horizon<<"\t";
            char formvalue[10];
            sprintf(formvalue, "%.6f", V);
            of << formvalue <<"\t";
            of << ticks <<"\t";
            of << utime <<"\t";
            of << jesp->GetJointPolicyPureVector()->GetIndex() <<"\n";
            of.flush();
        }
    }

    delete jesp;
    
    Time.Stop("Overall");

    if(args.verbose >= 0)
    {
        Time.PrintSummary();
    }
    if(args.saveTimings && !args.dryrun)
        Time.Save(timingsFilename);
    }
    catch(E& e){ e.Print(); }
}
Пример #4
0
int main(int argc, char **argv)
{
    ArgumentHandlers::Arguments args;
    argp_parse (&ArgumentHandlers::theArgpStruc, argc, argv, 0, 0, &args);

    try
    {
        Timing Time;
        cout << "Instantiating the problem..."<<endl;
        DecPOMDPDiscreteInterface* decpomdp = GetDecPOMDPDiscreteInterfaceFromArgs(args);
        cout << "...done."<<endl;

        //set up output files
        string filename="/dev/null", timingsFilename="/dev/null";
        if(!args.dryrun)
        {
            stringstream ss;
            ss  << directories::MADPGetResultsFilename("VI", *decpomdp, args)
                << "_h" << args.horizon;
            filename=ss.str();
            timingsFilename=filename + "_Timings";
            if(!file_exists(filename))
            {
                cout << "VI: could not open " << filename <<endl;
                cout << "Results will not be stored to disk." <<endl;
                args.dryrun = true;
            }
        }

        //start VI
        PlanningUnitDecPOMDPDiscrete *np = new NullPlanner(args.horizon, decpomdp);
        MDPValueIteration vi(*np);
        cout << "Running value iteration..."<<endl;
        Time.Start("Plan");
        vi.Plan();
        Time.Stop("Plan");
        cout << "...done."<<endl;
        QTable q = vi.GetQTable(0); //<- infinite horizon, so get 1 value function of stage 0

        int nrRuns = args.nrRuns; //defaults to 1000, see argumentHandlers.h
        int seed = args.randomSeed; //defaults to 42
        cout << "Simulating policy with nrRuns: "
             << nrRuns << " and seed: " << seed <<endl;
        SimulationDecPOMDPDiscrete sim(*np, nrRuns, seed);

        //write intermediate simulation results to file
        if(!args.dryrun)
            sim.SaveIntermediateResults(filename);

        vector<double> avgRewards;
        double r = runOneSimulation(q, np, sim);
        cout << "...done"<<endl;

        avgRewards.push_back(r);
        cout << "Avg rewards: " << SoftPrintVector(avgRewards) << endl;

        //write VI timing information to file
        if(!args.dryrun)
            Time.Save(timingsFilename);
    }
    catch(E& e){ e.Print(); }

    return(0);
}