Example #1
0
int ssp_rtdp_update_cpu(const MDP *mdp, SSPRTDPCPU *rtdp)
{
    unsigned int s = mdp->s0;
    bool isGoal = ssp_rtdp_is_goal_cpu(mdp, rtdp, s);
    bool isDeadEnd = ssp_rtdp_is_dead_end_cpu(mdp, rtdp, s);

    rtdp->currentHorizon = 0;

    while (!isGoal && !isDeadEnd && rtdp->currentHorizon < mdp->horizon) {
        // Take a greedy action and update the value of this state.
        ssp_rtdp_bellman_update_state_cpu(mdp->n, mdp->ns, mdp->m,
                                            mdp->S, mdp->T, mdp->R,
                                            s, rtdp->V, rtdp->pi);

        // This is the greedy action.
        unsigned int a = rtdp->pi[s];

        // Randomly explore the state space using the action.
        unsigned int sp = 0;
        ssp_rtdp_random_successor_cpu(mdp, rtdp, s, a, sp);

        // Transition to the next state.
        s = sp;

        // Check if this is a goal or an explicit dead end. If so, then we will stop.
        isGoal = ssp_rtdp_is_goal_cpu(mdp, rtdp, s);
        isDeadEnd = ssp_rtdp_is_dead_end_cpu(mdp, rtdp, s);

        rtdp->currentHorizon++;
    }

    // Assign the final state's value. Note that states marked as goals can, in fact, be
    // dead ends if the MDP was improperly created. Thus, it would assign FLT_MAX as an override.
    if (isGoal) {
        rtdp->V[s] = 0.0f;
    }
    if (isDeadEnd) {
        rtdp->V[s] = FLT_MAX;
    }

    // Regardless, always mark the final state as expanded.
    ssp_rtdp_mark_expanded_cpu(mdp, rtdp, s);

    return NOVA_SUCCESS;
}
Example #2
0
int ssp_rtdp_update_cpu(MDP *mdp)
{
    unsigned int s = mdp->s0;
    bool isGoal = false;
    bool isDeadEnd = false;

    while (!isGoal && !isDeadEnd) {
        // Take a greedy action and update the value of this state. We oscillate between V depending on the step.
        ssp_rtdp_bellman_update_state_cpu(mdp->n, mdp->ns, mdp->m,
                                            mdp->S, mdp->T, mdp->R,
                                            s, mdp->V, mdp->pi);

        // This is the greedy action.
        unsigned int a = mdp->pi[s];

        // Randomly explore the state space using the action.
        float target = (float)rand() / (float)RAND_MAX;
        float current = 0.0f;
        unsigned int sp = mdp->m;

        for (unsigned int i = 0; i < mdp->ns; i++) {
            int spTmp = mdp->S[s * mdp->m * mdp->ns + a * mdp->ns + i];
            if (spTmp < 0) {
                break;
            }

            // For any strange edge cases, we ensure a valid state
            // transition can arise.
            if (sp == mdp->m) {
                sp = spTmp;
            }

            current += mdp->T[s * mdp->m * mdp->ns + a * mdp->ns + i];

            if (current >= target) {
                sp = spTmp;
                break;
            }
        }

        s = sp;

        // Add s to the set of expanded states, and set the action to
        // a valid one. This assignment ensures that goal states will
        // have a valid action, with their default values being invalid.
        if (mdp->pi[s] == mdp->m) {
            mdp->pi[s] = 0;

            mdp->expanded[mdp->ne] = s;
            mdp->ne++;
        }

        // Check if s is a goal.
        for (unsigned int i = 0; i < mdp->ng; i++) {
            if (s == mdp->goals[i]) {
                mdp->V[s] = 0.0f;
                isGoal = true;
                break;
            }
        }

        //*
        // ***Special Modification***
        // If this is an explicit dead end, meaning non-zero cost for
        // all actions and it is a self-loop, then we terminate but
        // assign the value of V to be maximal possible to within
        // machine precision.
        isDeadEnd = true;
        for (unsigned int ap = 0; ap < mdp->m; ap++) {
            if (!(mdp->T[s * mdp->m * mdp->ns + ap * mdp->ns + 0] == 1.0f && mdp->R[s * mdp->m + ap] > 0.0f)) {
                isDeadEnd = false;
                break;
            }
        }

        if (isDeadEnd) {
            mdp->V[s] = FLT_MAX;
        }
        //*/
    }

    mdp->currentHorizon++;

    return NOVA_SUCCESS;
}