Exemplo n.º 1
0
void JESPExhaustivePlanner::Plan()
{
    if(DEBUG_EXJESP) {
        cout << "\n---------------------------------"<<endl;
        cout << "Exhaustive JESP  - Plan() started"<<endl;
        cout << "---------------------------------"<<endl;
    }
    double v_best = -DBL_MAX;
    JointPolicyPureVector *jpol = new JointPolicyPureVector(this);
    JointPolicyPureVector *best = new JointPolicyPureVector(this);
    jpol->RandomInitialization();

    if(DEBUG_EXJESP) {
        cout << "joint policy randomly initialized to:";
        jpol->Print();
    }

    int stop = 0;
    size_t nr_non_improving_agents = 0;
    while(nr_non_improving_agents < GetDPOMDPD()->GetNrAgents()
            && stop++ < 1000)
    {
        int agentI = GetNextAgentIndex();
        double v = ExhaustiveBestResponse(jpol, agentI);
        if(v > v_best + 1e-9)
        {
            (*best) = (*jpol);
            if(DEBUG_EXHBR)
            {
                cout << "Plan: new best policy:"<<endl;
                best->Print();
            }
            v_best = v;
            nr_non_improving_agents = 0;
        }
        else
            nr_non_improving_agents++;
    }
    _m_foundPolicy = JPPV_sharedPtr(best);
    _m_expectedRewardFoundPolicy=v_best;


    if(DEBUG_EXJESP) {
        cout << "Exhaustive JESP  - resulting policy:"<<endl;
        cout << "------------------------------------"<<endl;
        best->Print();
    }
}
/* this is the high-level pseudo-code for what happens:
    start with a horizon 0 joint policy - i.e. specifying 0 actions
    JPolValPool.push( <jpol,val=0.0> )    
    do
        ppi = <pol,val> = JPolValPool.GetNext()

        //poolOfNextPolicies     = {<pol,vals>} 
        //isLowerBound  = bool   - whether the vals are lower bounds to the 
        //                optimal value (i.e. value for the optimal policy)
        <poolOfNextPolicies, isLowerBound> = ConstructAndValuateNextPolicies(ppi)
        if(isLowerBound)
            Prune( JPolValPool, max(lowerBound) )

        poolOfNextPolicies = SelectPoliciesToProcessFurther(poolOfNextPolicies);
        JPolValPool.insert(poolOfNextPolicies)
      
    while !empty JPolValPool
*/
void GeneralizedMAAStarPlanner::Plan()    
{
    if( _m_foundPolicy != 0)
        delete _m_foundPolicy;

    StartTimer("GMAA::Plan");

    //stuff for timing (if used)
    tms ts_start;   //the time struct
    clock_t tck_start;  //ticks
    tck_start = times(&ts_start);
    //the intermediate timing stream
    ofstream & its = *_m_intermediateResultFile;

    //this counter maintains the maximum policy pool size.
    _m_maxJPolPoolSize = 0;
    double maxLB = -DBL_MAX;
    PartialJointPolicyDiscretePure * bestJPol = NewJPol();
    bestJPol->SetPastReward(-DBL_MAX);
#if DEBUG_GMAA_POLS    
    cout << "GMAA initialized with empty policy:"<<endl;
    bestJPol->Print();
#endif                 
    PartialPolicyPoolInterface * pp_p = NewPP(); 
    pp_p->Init( GetThisFromMostDerivedPU() ); //initialize with empty joint policy
    do
    {
        StartTimer("GMAA::Plan::iteration");
        if(_m_saveIntermediateTiming)
            SaveTimers(_m_intermediateTimingFilename);

        if(_m_verboseness >= 2) {
            cout << "\n---------------------------------------------------\n";
            cout << "-->>Start of new GMAA iteration, polpool size="<<
                pp_p->Size()<<"<<--"<<endl;
        }

        PartialPolicyPoolItemInterface* ppi = pp_p->Select();
        PartialJointPolicyDiscretePure * jpol_sel =  ppi->GetJPol();
        double v_sel = ppi->GetValue();
        size_t depth_sel = jpol_sel->GetDepth();
        if(_m_verboseness >= 3) {
            cout << "Select returned the following policy to expand:\n";
            ppi->GetJPol()->Print();
            cout << "of depth="<< depth_sel << " and heur. val="<<v_sel<<endl;
        }
        
        if( (v_sel + _m_slack) < maxLB) //the highest upperbound < the best lower
        {
            //TODO: 
            //  1)if JPolValPool is no priority queue, this should be changed.
            //  2)this is only necessary, because PRUNE (see todo below) is nyi
            if(_m_verboseness >= 0)
                cout<<"highest upper < best found lower bound, stopping\n";
            break;
        }

        //poolOfNextPolicies     = {<pol,vals>} 
        //isLowerBound  = bool   - whether the vals are lower bounds to the 
        //                optimal value (i.e. value for the optimal policy)
        //<poolOfNextPolicies,isLowerBound>=ConstructAndValuateNextPolicies(ppi)

        PartialPolicyPoolInterface * poolOfNextPolicies = NewPP();
        bool are_LBs = ConstructAndValuateNextPolicies(ppi, poolOfNextPolicies);

        //Clean up ppi
        if(pp_p->Size() > 0) //should always be true
        {
            pp_p->Pop();
            delete ppi;
        }
        else //should not happen
            throw E("GeneralizedMAAStarPlanner.cpp:policy pool empty? - should not happen?");

#if DEBUG_GMAA4        
        if(DEBUG_GMAA4){
            cout << "\n>>>The next policies found, poolOfNextPolicies:"<<endl;
            PartialPolicyPoolInterface* pp_copy = NewPP();
            *pp_copy = *poolOfNextPolicies;
            while(! pp_copy->Empty())
            {
                PartialPolicyPoolItemInterface* it = pp_copy->Select();
                it->Print();
                cout << endl;
                pp_copy->Pop();
            }
        }
#endif        

        //if(isLowerBound)
        //    Prune( JPolValPool, max(lowerBound) )
        if(are_LBs && poolOfNextPolicies->Size() > 0)
        {
            PartialPolicyPoolItemInterface* bestRanked_ppi = poolOfNextPolicies->
                GetBestRanked();
            poolOfNextPolicies->PopBestRanked();
            double bestNextVal = bestRanked_ppi->GetValue();
            if(bestNextVal > maxLB) //new best lowerbound (and policy) found
            {
                maxLB = bestNextVal;
                *bestJPol = *(bestRanked_ppi->GetJPol());
                if(_m_verboseness >= 2) {
                    cout << "new bestJPol (and max. lowerbound) found!" << endl;
                    cout << "Its value v="
                         << bestNextVal <<" - "
                         << bestRanked_ppi->GetJPol()->SoftPrintBrief() << endl;
                }
                if(_m_verboseness >= 3) 
                    cout << "new bestJPol->SoftPrint():"<<bestJPol->SoftPrint();

                //if we maintain the internal timings...
                if(_m_intermediateResultFile != 0)
                {
                    tms ts_cur;
                    clock_t tck_cur;
                    tck_cur = times(&ts_cur);
                    clock_t diff = tck_cur - tck_start;
                    its << diff << "\t" <<  maxLB << endl;
                }
                // prune JPolValPool
                pp_p->Prune(maxLB - _m_slack );
            }
            delete bestRanked_ppi;

        }
        SelectPoliciesToProcessFurther(poolOfNextPolicies, are_LBs, maxLB - _m_slack);
        pp_p->Union(poolOfNextPolicies);

        delete poolOfNextPolicies;

        if( _m_maxJPolPoolSize < pp_p->Size())
            _m_maxJPolPoolSize = pp_p->Size();
        
        StopTimer("GMAA::Plan::iteration");
        if(_m_verboseness >= 2) { 
            cout << "\nGMAA::Plan::iteration ending, best policy found so far:";
            cout << endl << bestJPol->SoftPrintBrief()  <<endl;
            if(_m_verboseness >= 3)  
                cout << endl << bestJPol->SoftPrint()  <<endl;
        }

    } 
    while(! pp_p->Empty() ); //<- end do...while
    //we don't want to do any conversions here... takes (sometimes too much)
    //time...
    _m_foundPolicy=bestJPol;  //->ToJointPolicyPureVector());


    _m_expectedRewardFoundPolicy=maxLB;
    if(_m_verboseness >= 1) {
        cout << "\nGMAA::Plan ending, best policy found: ";
        cout << bestJPol->SoftPrintBrief() << " = " <<endl;
        if(_m_verboseness >= 3) 
            cout << _m_foundPolicy->SoftPrint() << endl;
        cout << endl;
#if 0
    JointPolicyPureVector* jppv = _m_foundPolicy->ToJointPolicyPureVector();
    jppv->Print();
#endif                 
    }
    if(_m_verboseness >= 2)    
        cout << "\n\n ";
    if(_m_verboseness >= 0)    
        cout << "GMAA::Plan GMAA ENDED"<<endl;
    if(_m_verboseness >= 2)    
        cout << "\n\n ";

    delete pp_p;
    
    StopTimer("GMAA::Plan");
}