double BayesianGameForDecPOMDPStage::
ComputeImmediateReward(Index jtI, Index jaI) const
{
    //Index jaohI = _m_jaohReps.at(jtI);
    JointBeliefInterface* jb = _m_JBs.at(jtI);
    BeliefIteratorGeneric bit = jb->GetIterator();
    double r = 0.0;
    do{
        Index s = bit.GetStateIndex();
        double p = bit.GetProbability();
        r += p * _m_pu->GetReward(s, jaI);
    }while (bit.Next() );
    return r;
    
}
/**Calculates the jaohI corresponding to jaI_arr and joI_arr and also 
 * returnes the P(jaohI) and the expected obtained reward for previous
 * time steps GIVEN this joint action history.
 *
 * \todo TODO add following to .h
 *
 * basically this function is a form of
 *      PlanningUnitMADPDiscrete::GetJAOHProbs(Recursively)
 * that also computes the reward.
 *
 * */
void BayesianGameForDecPOMDPStage::ProbRewardForjoahI(
        //input args
        Index ts, Index jtI, Index* jaI_arr,Index* joI_arr, 
        //output args
        Index& jaohI, double& PjaohI, double& ExpR_0_prevTS_thisJAOH )
{
    //first we get the initial jaoh
    JointActionObservationHistoryTree * jaoht = 
        _m_pu->GetJointActionObservationHistoryTree(0);

    double CPjaohI = 1.0; 
    PjaohI = CPjaohI; // == 1.0

    // get the initaal belief
    JointBeliefInterface* jb = _m_pu->GetNewJointBeliefFromISD();

    Index tI = 0;
    while(tI < ts)
    {
        //calculate the R for tI
        double ExpR_0_prevTS_thisJAOH_thisT = 0.0;
#if USE_BeliefIteratorGeneric
        BeliefIteratorGeneric it=jb->GetIterator();
        do
        {
            double R_si_ja = _m_pu->GetReward(it.GetStateIndex(), jaI_arr[tI]); 
            if(DEBUG_BG4DECPOMDP4) 
                cout << "R(s="<<it.GetStateIndex()<<",ja="<<jaI_arr[tI]<<")="<< R_si_ja << "\n";
            ExpR_0_prevTS_thisJAOH_thisT += it.GetProbability() * R_si_ja;
        } while(it.Next());
#else
        for(Index sI=0; sI < _m_pu->GetNrStates(); sI++)
        {
            double R_si_ja = _m_pu->GetReward(sI, jaI_arr[tI]); 
            if(DEBUG_BG4DECPOMDP4) 
                cout << "R(s="<<sI<<",ja="<<jaI_arr[tI]<<")="<< R_si_ja << "\n";
            ExpR_0_prevTS_thisJAOH_thisT += jb->Get(sI) * R_si_ja;
        }
#endif
        ExpR_0_prevTS_thisJAOH += ExpR_0_prevTS_thisJAOH_thisT;
        if(DEBUG_BG4DECPOMDP4)
        {
            cout << "calculating expected reward R(oaHist,a) for tI="<<tI
                <<"oaHist:"; jaoht->GetJointActionObservationHistory()->Print();
            cout << endl; cout << "R(b,a) (exp reward for jtI="  << jtI << 
                ", tI="<<tI<<") is "<< ExpR_0_prevTS_thisJAOH_thisT <<endl;

        }
            jaoht = jaoht->GetSuccessor( jaI_arr[tI], joI_arr[tI] );
            jaohI = jaoht->GetIndex();

            CPjaohI = jb->Update( *_m_pu->GetReferred(), jaI_arr[tI], joI_arr[tI]  );
        PjaohI =  PjaohI * CPjaohI;     
        tI++;
    }
    delete jb; //free the belief allocated with 'new'
    if(DEBUG_BG4DECPOMDP4)
    {
        cout << "expected previous reward (up to ts-1) for (jtI="  << jtI << 
            ") ";
        jaoht->GetJointActionObservationHistory()->Print();
        cout << " is "<< ExpR_0_prevTS_thisJAOH <<endl << endl;
    }
}