void actionOutcome(pair<State*, Action*> performed, pair<State*, double> outcome) { State *startState = performed.first; Action *actionPerformed = performed.second; State *resultState = outcome.first; double reward = outcome.second; assert(haveSeenState(startState)); if (!haveSeenState(resultState)) { handleNewState(resultState); } ActionValue *curValue = findActionValue(startState, actionPerformed); assert(curValue != nullptr); double newQ = reward + futureDiscount * maxQ(resultState); curValue->value += learnRate * (newQ - curValue->value); }
void ActionOutcome(pair<State *, Action *> performed, pair<State *, double> outcome) { State *startState = performed.first; Action *actionPerformed = performed.second; State *resultState = outcome.first; double reward = outcome.second; if (!haveSeenState(startState)) { handleNewState(startState); } if (!haveSeenState(resultState)) { handleNewState(resultState); } ActionValue *curValue = findActionValue(startState, actionPerformed); assert(curValue != nullptr); double newQ = reward + futureDiscount * maxQ(resultState); // cout << "r: " << reward << " fd: " << futureDiscount << " newQ: " << newQ << endl; curValue->value += learnRate * (newQ - curValue->value); // cout << "v : " << curValue->value << endl; // getchar(); }