ValueFunctionPOMDPDiscrete
AlphaVectorPlanning::QFunctionsToValueFunction(const 
                                               QFunctionsDiscrete &Q)
{
    ValueFunctionPOMDPDiscrete V;
    for(QFDcit i=Q.begin();i!=Q.end();++i)
        for(VFPDcit j=i->begin();j!=i->end();++j)
            V.push_back(*j);
    
    return(V);
}
示例#2
0
double BeliefValue::GetValue(const BeliefInterface &Belief,
                             const QFunctionsDiscrete &Q)
{
    double x,maxVal=-DBL_MAX;

    for(QFDcit i=Q.begin();i!=Q.end();++i)
    {
        x=GetValue(Belief,*i);
        if(x>maxVal)
            maxVal=x;
    }

    return(maxVal);    
}
示例#3
0
vector<double> BeliefValue::GetValues(const BeliefSet &Beliefs,
                                      const QFunctionsDiscrete &Q)
{
    vector<double> values(Beliefs.size()),
        maxValues(Beliefs.size(),-DBL_MAX);
    for(QFDcit i=Q.begin();i!=Q.end();++i)
    {
        values=GetValues(Beliefs,*i);
        for(unsigned int j=0;j!=values.size();++j)
            if(values[j]>maxValues[j])
                maxValues[j]=values[j];
    }

    return(maxValues);
}
示例#4
0
void QAlphaVector::Initialize()
{
    size_t nrS=GetPU()->GetNrStates();
    size_t nrA=GetPU()->GetNrJointActions();
    size_t h=GetPU()->GetHorizon();

#if 0 // this was code to convert alpha vectors coming from an infinite-horizon method

    ValueFunctionPOMDPDiscrete V=AlphaVectorPlanning::ImportValueFunction(_m_filename);
    cout << SoftPrint(V) << endl;
    QFunctionsDiscrete Q = AlphaVectorPlanning::ValueFunctionToQ(V,nrA,nrS);

    _m_Q.resize(h);
    for(Index t=0;t!=h;++t)
    {
        _m_Q.at(t).resize(nrA);
        for(Index a=0;a!=nrA;++a)
            for(Index i=0;i!=Q.at(a).size();++i)
            {
                AlphaVector alpha(nrS);
                alpha.SetAction(a);
                for(Index s=0;s!=nrS;++s)
                {
                    double value=Q.at(a).at(i).GetValue(s+(nrS*t));
                    alpha.SetValue(value,s);
                }
                _m_Q.at(t).at(a).push_back(alpha);
                cout << t << " " << a << " " << alpha.SoftPrint() << endl;
            }
    }

#else

    for(Index t=0;t!=h;++t)
    {
        stringstream ss;
        ss << _m_filename << "_t" << t;
        ValueFunctionPOMDPDiscrete V=AlphaVectorPlanning::ImportValueFunction(ss.str());
        _m_Q.push_back(AlphaVectorPlanning::ValueFunctionToQ(V,nrA,nrS));
    }

#endif
}
QFunctionsDiscrete
AlphaVectorPlanning::ValueFunctionToQ(const ValueFunctionPOMDPDiscrete &V,
                                      size_t nrA, size_t nrS)
{
    QFunctionsDiscrete Qs;

    for(Index a=0;a!=nrA;a++)
    {
        ValueFunctionPOMDPDiscrete Q;
        for(Index i=0;i!=V.size();i++)
        {
            if(V[i].GetAction()==a)
                Q.push_back(V[i]);
        }
        // if the action has no vector, it's dominated everywhere, so
        // must never be chosen
        if(Q.size()==0)
        {
            AlphaVector dominatedVector(nrS,-DBL_MAX);
            dominatedVector.SetAction(a);
            Q.push_back(dominatedVector);
#if DEBUG_AlphaVectorPlanning_ValueFunctionToQ
            cout << "AlphaVectorPlanning::GetQFunctionsFromV: action " << a 
                 << " is dominated" << endl;
#endif
        }
        else
        {
#if DEBUG_AlphaVectorPlanning_ValueFunctionToQ
            cout << "AlphaVectorPlanning::GetQFunctionsFromV: action " << a 
                 << " has " << Q.size() 
                 << " vector(s) " << endl;
#endif
        }
        Qs.push_back(Q);
    }
    
    return(Qs);
}
示例#6
0
QFunctionsDiscrete
PerseusBGPlanner::BackupStageAll(const BeliefSet &S,
                                 const QFunctionsDiscrete &Q) const
{
    vector<double> VB=BeliefValue::GetValues(S,Q),
        VBalpha;

    int nrB=VB.size(),
        nrS=GetPU()->GetNrStates();
    QFunctionsDiscrete Q1(Q.size());
    AlphaVector alpha(nrS);

    ValueFunctionPOMDPDiscrete V=QFunctionsToValueFunction(Q);
    GaoVectorSet Gao=BackupStageLeadIn(V);

    ValueFunctionPOMDPDiscrete Qalphas;

    for(int k=0;k<nrB;++k)
    {
        Qalphas.clear();
        for(unsigned int a=0;a!=GetPU()->GetNrJointActions();++a)
        {
            // backup the belief
            alpha=BeliefBackup(*S[k],a,Gao,V,_m_backupType);
           
            // add alpha to Q1
            if(!VectorIsInValueFunction(alpha,Q1[a]))
                Q1[a].push_back(alpha);
            Qalphas.push_back(alpha);
        }

        if(GetVerbose())
        {
            VBalpha=BeliefValue::GetValues(S,Qalphas);
            cout << "Added vectors for " << k << " (V " << VBalpha[k] 
                 << ")" << endl;
        }
    }

    BackupStageLeadOut(Gao);

    return(Q1);
}
示例#7
0
void MonahanPOMDPPlanner::MonahanCrossSum(const GaoVectorSet &G,
        QFunctionsDiscrete &Q,
        Index a,
        bool doIncPrune,
        size_t maxNrAlphas) const
{
    Index nrS=GetPU()->GetNrStates(),
          nrO=GetPU()->GetNrJointObservations();
    AlphaVector alpha(nrS);

#if DEBUG_AlphaVectorPlanning_CrossSum
    cout << "AlphaVectorPlanning::MonahanCrossSum for action " << a << endl;
#endif

    // initialize with the number of vectors already computed for other time steps
    size_t nrVectorsComputed=GetNrVectors();

    // Do the cross-sums, creates G_a of (3.25)
    VectorSet Ga=*G[a][0];
    for(Index o=1; o!=nrO; o++)
    {
        if(doIncPrune)
        {
            VectorSet Ga2=CrossSum(Ga,*G[a][o]);
#if 0
            VectorSet Ga1=Prune(Ga2,maxNrAlphas);
            Ga=Ga1;
#else
            Ga=Prune(Ga2);
#endif
        }
        else
        {
            VectorSet Ga1=CrossSum(Ga,*G[a][o]);
            Ga=Ga1;
        }
#if DEBUG_AlphaVectorPlanning_CrossSum
        if(maxNrAlphas)
            cout << "AlphaVectorPlanning::MonahanCrossSum nrAlphas " << Ga.size1()
                 << " (max " << maxNrAlphas << ")" << endl;
#endif

        CheckMaxNrVectors(maxNrAlphas,nrVectorsComputed+Ga.size1());
    }

    // Add the resulting vectors to V (HV_n of (3.25))
    for(Index k=0; k!=Ga.size1(); ++k)
    {
        alpha.SetAction(a);
        for(Index s=0; s!=nrS; s++)
            alpha.SetValue(Ga(k,s),s);
        Q[a].push_back(alpha);
        nrVectorsComputed++;
    }

    if(!doIncPrune)
    {
        Q[a]=Prune(Q[a]);
        CheckMaxNrVectors(maxNrAlphas,nrVectorsComputed+Q[a].size());
        // THIS IS WRONG
        throw(E("MonahanPOMDPPlanner fix error"));
        if(maxNrAlphas && Q.size()>maxNrAlphas)
        {
            stringstream ss;
            ss << "AlphaVectorPlanning::MonahanCrossSum() too many alpha vectors "
               << Q.size() << ">" << maxNrAlphas;
            throw(E(ss.str()));
        }
    }
}
示例#8
0
QFunctionsDiscrete
PerseusBGPlanner::BackupStageSamplingAlt(const BeliefSet &S,
                                         const QFunctionsDiscrete &Q) const
{
    vector<double> VB=BeliefValue::GetValues(S,Q),
        VBalpha;

    int nrB=VB.size(),
        nrNotImproved=nrB,
        nrS=GetPU()->GetNrStates(),
        k;
    vector<bool> stillNeedToBeImproved(nrB,true);
    QFunctionsDiscrete Q1(Q.size());
    AlphaVector alpha(nrS);

    ValueFunctionPOMDPDiscrete V=QFunctionsToValueFunction(Q);
    GaoVectorSet Gao=BackupStageLeadIn(V);

    ValueFunctionPOMDPDiscrete Qalphas;

    k=-1;

    while(nrNotImproved!=0)
    {
        Qalphas.clear();
        for(unsigned int a=0;a!=GetPU()->GetNrJointActions();++a)
        {
            // sample a belief index from the number of not improved beliefs
            k=SampleNotImprovedBeliefIndex(stillNeedToBeImproved,nrNotImproved);
            // backup the belief
            alpha=BeliefBackup(*S[k],a,Gao,V,_m_backupType);

            // check whether alpha improves the value of S[k]
            double x=S[k]->InnerProduct(alpha.GetValues());
            // if not, get copy from old value function
            if(x<VB[k]) //QB[a][k])//VB[k])
            {
#if DEBUG_PerseusBGPlanner
                cout << "Getting n-1 vector for action " << a << ", belief "
                     << k << " (" << x << " < " << VB[k] << ")" << endl;
#endif
                alpha=BeliefValue::GetMaximizingVector(S,k,Q[a]);
            }
            else
            {
                if(GetVerbose())
                    cout << "Added vector for action " << a << ", belief " 
                         << k << " (Q " << x << " >= " << VB[k] << ")"
                         << endl;
            }
            
            // add alpha to Q1
            if(!VectorIsInValueFunction(alpha,Q1[a]))
                Q1[a].push_back(alpha);
            Qalphas.push_back(alpha);
        }

        // update which beliefs have been improved
        VBalpha=BeliefValue::GetValues(S,Qalphas);
        int nrImprovedByAlpha=0;
        for(int b=0;b!=nrB;b++)
            if(stillNeedToBeImproved[b] && VBalpha[b]>=VB[b])
            {
                stillNeedToBeImproved[b]=false;
                nrNotImproved--;
                nrImprovedByAlpha++;
            }

        if(GetVerbose())
            cout << "Added vectors for " << k << " (V " << VBalpha[k] 
                 << " improved " << nrImprovedByAlpha << ")" << endl;
    }

    BackupStageLeadOut(Gao);

    return(Q1);
}