Example #1
0
void AgentBG::ResetEpisode()
{
    _m_t=0;

    JointBeliefInterface* jbi = GetPU()->GetNewJointBeliefFromISD();
    _m_prevJB.Set(jbi->Get());
    delete jbi;
    _m_oIs.clear();
    _m_prevJoIs.clear();
    _m_prevJaIs.clear();
    _m_aIs.clear();
}
double BayesianGameForDecPOMDPStage::
ComputeImmediateReward(Index jtI, Index jaI) const
{
    //Index jaohI = _m_jaohReps.at(jtI);
    JointBeliefInterface* jb = _m_JBs.at(jtI);
    BeliefIteratorGeneric bit = jb->GetIterator();
    double r = 0.0;
    do{
        Index s = bit.GetStateIndex();
        double p = bit.GetProbability();
        r += p * _m_pu->GetReward(s, jaI);
    }while (bit.Next() );
    return r;
    
}
Example #3
0
AlphaVector::BGPolicyIndex
AgentBG::GetMaximizingBGIndex(const JointBeliefInterface &jb) const
{
    double v=-DBL_MAX,q;
    AlphaVector::BGPolicyIndex bI,betaMaxI=INT_MAX;

    for(Index a=0;a!=GetPU()->GetNrJointActions();++a)
    {
        q=_m_QBGstationary->GetQ(jb,a,bI);

        if(q>v)
        {
            v=q;
            betaMaxI=bI;
        }
    }

#if DEBUG_AgentBG
    cout << "GetMaximizingBGIndex " << GetIndex() << ": betaMaxI " << betaMaxI 
         << " " << jb.SoftPrint() << endl;
#endif
    return(betaMaxI);
}
/**Calculates the jaohI corresponding to jaI_arr and joI_arr and also 
 * returnes the P(jaohI) and the expected obtained reward for previous
 * time steps GIVEN this joint action history.
 *
 * \todo TODO add following to .h
 *
 * basically this function is a form of
 *      PlanningUnitMADPDiscrete::GetJAOHProbs(Recursively)
 * that also computes the reward.
 *
 * */
void BayesianGameForDecPOMDPStage::ProbRewardForjoahI(
        //input args
        Index ts, Index jtI, Index* jaI_arr,Index* joI_arr, 
        //output args
        Index& jaohI, double& PjaohI, double& ExpR_0_prevTS_thisJAOH )
{
    //first we get the initial jaoh
    JointActionObservationHistoryTree * jaoht = 
        _m_pu->GetJointActionObservationHistoryTree(0);

    double CPjaohI = 1.0; 
    PjaohI = CPjaohI; // == 1.0

    // get the initaal belief
    JointBeliefInterface* jb = _m_pu->GetNewJointBeliefFromISD();

    Index tI = 0;
    while(tI < ts)
    {
        //calculate the R for tI
        double ExpR_0_prevTS_thisJAOH_thisT = 0.0;
#if USE_BeliefIteratorGeneric
        BeliefIteratorGeneric it=jb->GetIterator();
        do
        {
            double R_si_ja = _m_pu->GetReward(it.GetStateIndex(), jaI_arr[tI]); 
            if(DEBUG_BG4DECPOMDP4) 
                cout << "R(s="<<it.GetStateIndex()<<",ja="<<jaI_arr[tI]<<")="<< R_si_ja << "\n";
            ExpR_0_prevTS_thisJAOH_thisT += it.GetProbability() * R_si_ja;
        } while(it.Next());
#else
        for(Index sI=0; sI < _m_pu->GetNrStates(); sI++)
        {
            double R_si_ja = _m_pu->GetReward(sI, jaI_arr[tI]); 
            if(DEBUG_BG4DECPOMDP4) 
                cout << "R(s="<<sI<<",ja="<<jaI_arr[tI]<<")="<< R_si_ja << "\n";
            ExpR_0_prevTS_thisJAOH_thisT += jb->Get(sI) * R_si_ja;
        }
#endif
        ExpR_0_prevTS_thisJAOH += ExpR_0_prevTS_thisJAOH_thisT;
        if(DEBUG_BG4DECPOMDP4)
        {
            cout << "calculating expected reward R(oaHist,a) for tI="<<tI
                <<"oaHist:"; jaoht->GetJointActionObservationHistory()->Print();
            cout << endl; cout << "R(b,a) (exp reward for jtI="  << jtI << 
                ", tI="<<tI<<") is "<< ExpR_0_prevTS_thisJAOH_thisT <<endl;

        }
            jaoht = jaoht->GetSuccessor( jaI_arr[tI], joI_arr[tI] );
            jaohI = jaoht->GetIndex();

            CPjaohI = jb->Update( *_m_pu->GetReferred(), jaI_arr[tI], joI_arr[tI]  );
        PjaohI =  PjaohI * CPjaohI;     
        tI++;
    }
    delete jb; //free the belief allocated with 'new'
    if(DEBUG_BG4DECPOMDP4)
    {
        cout << "expected previous reward (up to ts-1) for (jtI="  << jtI << 
            ") ";
        jaoht->GetJointActionObservationHistory()->Print();
        cout << " is "<< ExpR_0_prevTS_thisJAOH <<endl << endl;
    }
}
/// Export is in Tony Cassandra's POMDP file format.
void AlphaVectorPlanning::ExportPOMDPFile(const string & filename,
                                          const PlanningUnitDecPOMDPDiscrete 
                                          *pu)
{
    int nrA=pu->GetNrJointActions(),
        nrO=pu->GetNrJointObservations(),
        nrS=pu->GetNrStates();
    ofstream fp(filename.c_str());
    if(!fp)
    {
        cerr << "AlphaVectorPOMDP::ExportPOMDPFile: failed to open file "
             << filename << endl;            
    }

    fp << "discount: " << pu->GetDiscount() << endl;
    switch(pu->GetReferred()->GetRewardType())
    {
    case REWARD:
        fp << "values: reward" << endl;
        break;
    case COST:
        fp << "values: cost" << endl;
    }

    fp << "states:";
    for(int s=0;s<nrS;s++)
        fp << " "  << pu->GetReferred()->GetState(s)->SoftPrintBrief();
    fp << endl;

    fp << "actions:";
    for(int a=0;a<nrA;a++)
        fp << " "  << pu->GetReferred()->GetJointAction(a)->SoftPrintBrief();
    fp << endl;

    fp << "observations:";
    for(int o=0;o<nrO;o++)
        fp << " "  << pu->GetReferred()->GetJointObservation(o)->
            SoftPrintBrief();
    fp << endl;


    JointBeliefInterface*  isd = pu->GetNewJointBeliefFromISD();
    fp << "start: ";
    for(int s0=0;s0<nrS;s0++)
    {
        double bs = isd->Get(s0);
        fp <<  bs << " ";
    }
    fp << endl;

    delete isd;

    double p;
    for(int a=0;a<nrA;a++)
        for(int s0=0;s0<nrS;s0++)
            for(int s1=0;s1<nrS;s1++)
            {
                p=pu->GetTransitionProbability(s0,a,s1);
                if(p!=0)
                    fp << "T: " << a << " : " << s0 << " : " << s1 << " " 
                       << p << endl;
            }

    for(int a=0;a<nrA;a++)
        for(int o=0;o<nrO;o++)
            for(int s1=0;s1<nrS;s1++)
            {
                p=pu->GetObservationProbability(a,s1,o);
                if(p!=0)
                    fp << "O: " << a << " : " << s1 << " : " << o << " " 
                       << p << endl;
            }

    for(int a=0;a<nrA;a++)
        for(int s0=0;s0<nrS;s0++)
        {
            p=pu->GetReward(s0,a);
            if(p!=0)
                fp << "R: " << a << " : " << s0 << " : * : * "
                   << p << endl;
        }

}