예제 #1
0
  void actionOutcome(pair<State*, Action*> performed, pair<State*, double> outcome) {
    State *startState = performed.first;
    Action *actionPerformed = performed.second;

    State *resultState = outcome.first;
    double reward = outcome.second;

    assert(haveSeenState(startState));

    if (!haveSeenState(resultState)) {
      handleNewState(resultState);
    }

    ActionValue *curValue = findActionValue(startState, actionPerformed);
    assert(curValue != nullptr);

    double newQ = reward + futureDiscount * maxQ(resultState);
    curValue->value += learnRate * (newQ - curValue->value);
  }
예제 #2
0
  void ActionOutcome(pair<State *, Action *> performed, pair<State *, double> outcome) {
    State *startState = performed.first;
    Action *actionPerformed = performed.second;

    State *resultState = outcome.first;
    double reward = outcome.second;

    if (!haveSeenState(startState)) {
      handleNewState(startState);
    }

    if (!haveSeenState(resultState)) {
      handleNewState(resultState);
    }

    ActionValue *curValue = findActionValue(startState, actionPerformed);
    assert(curValue != nullptr);

    double newQ = reward + futureDiscount * maxQ(resultState);
    // cout << "r: " << reward << " fd: " << futureDiscount << " newQ: " << newQ << endl;
    curValue->value += learnRate * (newQ - curValue->value);
    // cout << "v : " << curValue->value << endl;
    // getchar();
  }