//this function extends a previous policy jpolPrevTs for ts-1 with the behavior specified by the policy of the BayesianGame for time step ts (jpolBG).
PartialJointPolicyDiscretePure* 
BayesianGameForDecPOMDPStage::ConstructExtendedPolicy(
        PartialJointPolicyDiscretePure& jpolPrevTs, 
        JointPolicyDiscretePure& jpolBG, 
        vector<size_t>& nrOHts, 
        vector<Index>& firstOHtsI
        )
{
    //check policy types
    if(jpolPrevTs.GetIndexDomainCategory() != PolicyGlobals::OHIST_INDEX)
        throw E("BayesianGameForDecPOMDPStage::ConstructExtendedPolicy --- jpolPrevTs.GetIndexDomainCategory() != PolicyGlobals::OHIST_INDEX ");
    if(jpolBG.GetIndexDomainCategory() != PolicyGlobals::TYPE_INDEX)
        throw E("BayesianGameForDecPOMDPStage::ConstructExtendedPolicy --- jpolPrevTs.GetIndexDomainCategory() != PolicyGlobals::TYPE_INDEX ");
    //construct a policy for the DecPOMDP: 
    //a copy of jpolPrevTs with extended to this time step (ts) by 
    //jpolBG
    PartialJointPolicyDiscretePure* jpolTs = new 
        PartialJointPolicyPureVector(jpolPrevTs);
    jpolTs->SetDepth( jpolTs->GetDepth()+1 );
    for(Index agentI=0; agentI < GetNrAgents(); agentI++)
    {
        for(Index type = 0; type < nrOHts[agentI]; type++)
        {
            Index ohI = type + firstOHtsI[agentI];
            jpolTs->SetAction(agentI, ohI, 
                    jpolBG.GetActionIndex(agentI, type) );
        }         
    }
    return(jpolTs);
}
void PlanningUnitFactoredDecPOMDPDiscrete::
ComputeFSAOHDist(
        FactoredStateAOHDistribution* fsaoh, 
        const PartialJointPolicyDiscretePure& pJPol) const
{
    Index depth = pJPol.GetDepth();
    //cout << "computing factored FSAOHDist for policy of depth"<< depth<<endl;
    fsaoh->InitializeFromISD( GetFDPOMDPD()->GetFactoredISD() );
    fsaoh->SanityCheck();
    Index lastStage;
    if(depth > 0)
        //depth 1 specifies action for t=0 -> compute FSAOH for t=1
        lastStage = depth; 
    else
        return;

    //cerr<<"starting updates from t=0 to t<"<<lastStage<<endl;
    for(Index t=0 ; t < lastStage; t++)
        fsaoh->Update(pJPol);
}
/* this is the high-level pseudo-code for what happens:
    start with a horizon 0 joint policy - i.e. specifying 0 actions
    JPolValPool.push( <jpol,val=0.0> )    
    do
        ppi = <pol,val> = JPolValPool.GetNext()

        //poolOfNextPolicies     = {<pol,vals>} 
        //isLowerBound  = bool   - whether the vals are lower bounds to the 
        //                optimal value (i.e. value for the optimal policy)
        <poolOfNextPolicies, isLowerBound> = ConstructAndValuateNextPolicies(ppi)
        if(isLowerBound)
            Prune( JPolValPool, max(lowerBound) )

        poolOfNextPolicies = SelectPoliciesToProcessFurther(poolOfNextPolicies);
        JPolValPool.insert(poolOfNextPolicies)
      
    while !empty JPolValPool
*/
void GeneralizedMAAStarPlanner::Plan()    
{
    if( _m_foundPolicy != 0)
        delete _m_foundPolicy;

    StartTimer("GMAA::Plan");

    //stuff for timing (if used)
    tms ts_start;   //the time struct
    clock_t tck_start;  //ticks
    tck_start = times(&ts_start);
    //the intermediate timing stream
    ofstream & its = *_m_intermediateResultFile;

    //this counter maintains the maximum policy pool size.
    _m_maxJPolPoolSize = 0;
    double maxLB = -DBL_MAX;
    PartialJointPolicyDiscretePure * bestJPol = NewJPol();
    bestJPol->SetPastReward(-DBL_MAX);
#if DEBUG_GMAA_POLS    
    cout << "GMAA initialized with empty policy:"<<endl;
    bestJPol->Print();
#endif                 
    PartialPolicyPoolInterface * pp_p = NewPP(); 
    pp_p->Init( GetThisFromMostDerivedPU() ); //initialize with empty joint policy
    do
    {
        StartTimer("GMAA::Plan::iteration");
        if(_m_saveIntermediateTiming)
            SaveTimers(_m_intermediateTimingFilename);

        if(_m_verboseness >= 2) {
            cout << "\n---------------------------------------------------\n";
            cout << "-->>Start of new GMAA iteration, polpool size="<<
                pp_p->Size()<<"<<--"<<endl;
        }

        PartialPolicyPoolItemInterface* ppi = pp_p->Select();
        PartialJointPolicyDiscretePure * jpol_sel =  ppi->GetJPol();
        double v_sel = ppi->GetValue();
        size_t depth_sel = jpol_sel->GetDepth();
        if(_m_verboseness >= 3) {
            cout << "Select returned the following policy to expand:\n";
            ppi->GetJPol()->Print();
            cout << "of depth="<< depth_sel << " and heur. val="<<v_sel<<endl;
        }
        
        if( (v_sel + _m_slack) < maxLB) //the highest upperbound < the best lower
        {
            //TODO: 
            //  1)if JPolValPool is no priority queue, this should be changed.
            //  2)this is only necessary, because PRUNE (see todo below) is nyi
            if(_m_verboseness >= 0)
                cout<<"highest upper < best found lower bound, stopping\n";
            break;
        }

        //poolOfNextPolicies     = {<pol,vals>} 
        //isLowerBound  = bool   - whether the vals are lower bounds to the 
        //                optimal value (i.e. value for the optimal policy)
        //<poolOfNextPolicies,isLowerBound>=ConstructAndValuateNextPolicies(ppi)

        PartialPolicyPoolInterface * poolOfNextPolicies = NewPP();
        bool are_LBs = ConstructAndValuateNextPolicies(ppi, poolOfNextPolicies);

        //Clean up ppi
        if(pp_p->Size() > 0) //should always be true
        {
            pp_p->Pop();
            delete ppi;
        }
        else //should not happen
            throw E("GeneralizedMAAStarPlanner.cpp:policy pool empty? - should not happen?");

#if DEBUG_GMAA4        
        if(DEBUG_GMAA4){
            cout << "\n>>>The next policies found, poolOfNextPolicies:"<<endl;
            PartialPolicyPoolInterface* pp_copy = NewPP();
            *pp_copy = *poolOfNextPolicies;
            while(! pp_copy->Empty())
            {
                PartialPolicyPoolItemInterface* it = pp_copy->Select();
                it->Print();
                cout << endl;
                pp_copy->Pop();
            }
        }
#endif        

        //if(isLowerBound)
        //    Prune( JPolValPool, max(lowerBound) )
        if(are_LBs && poolOfNextPolicies->Size() > 0)
        {
            PartialPolicyPoolItemInterface* bestRanked_ppi = poolOfNextPolicies->
                GetBestRanked();
            poolOfNextPolicies->PopBestRanked();
            double bestNextVal = bestRanked_ppi->GetValue();
            if(bestNextVal > maxLB) //new best lowerbound (and policy) found
            {
                maxLB = bestNextVal;
                *bestJPol = *(bestRanked_ppi->GetJPol());
                if(_m_verboseness >= 2) {
                    cout << "new bestJPol (and max. lowerbound) found!" << endl;
                    cout << "Its value v="
                         << bestNextVal <<" - "
                         << bestRanked_ppi->GetJPol()->SoftPrintBrief() << endl;
                }
                if(_m_verboseness >= 3) 
                    cout << "new bestJPol->SoftPrint():"<<bestJPol->SoftPrint();

                //if we maintain the internal timings...
                if(_m_intermediateResultFile != 0)
                {
                    tms ts_cur;
                    clock_t tck_cur;
                    tck_cur = times(&ts_cur);
                    clock_t diff = tck_cur - tck_start;
                    its << diff << "\t" <<  maxLB << endl;
                }
                // prune JPolValPool
                pp_p->Prune(maxLB - _m_slack );
            }
            delete bestRanked_ppi;

        }
        SelectPoliciesToProcessFurther(poolOfNextPolicies, are_LBs, maxLB - _m_slack);
        pp_p->Union(poolOfNextPolicies);

        delete poolOfNextPolicies;

        if( _m_maxJPolPoolSize < pp_p->Size())
            _m_maxJPolPoolSize = pp_p->Size();
        
        StopTimer("GMAA::Plan::iteration");
        if(_m_verboseness >= 2) { 
            cout << "\nGMAA::Plan::iteration ending, best policy found so far:";
            cout << endl << bestJPol->SoftPrintBrief()  <<endl;
            if(_m_verboseness >= 3)  
                cout << endl << bestJPol->SoftPrint()  <<endl;
        }

    } 
    while(! pp_p->Empty() ); //<- end do...while
    //we don't want to do any conversions here... takes (sometimes too much)
    //time...
    _m_foundPolicy=bestJPol;  //->ToJointPolicyPureVector());


    _m_expectedRewardFoundPolicy=maxLB;
    if(_m_verboseness >= 1) {
        cout << "\nGMAA::Plan ending, best policy found: ";
        cout << bestJPol->SoftPrintBrief() << " = " <<endl;
        if(_m_verboseness >= 3) 
            cout << _m_foundPolicy->SoftPrint() << endl;
        cout << endl;
#if 0
    JointPolicyPureVector* jppv = _m_foundPolicy->ToJointPolicyPureVector();
    jppv->Print();
#endif                 
    }
    if(_m_verboseness >= 2)    
        cout << "\n\n ";
    if(_m_verboseness >= 0)    
        cout << "GMAA::Plan GMAA ENDED"<<endl;
    if(_m_verboseness >= 2)    
        cout << "\n\n ";

    delete pp_p;
    
    StopTimer("GMAA::Plan");
}