void CFeatureOperatorAnd::getModifiedState(CStateCollection *stateCol, CState *featState) { int featureOffset = 1; std::list<CStateModifier *>::iterator it = getStateModifiers()->begin(); std::list<CState *>::iterator stateIt = states->begin(); CState *stateBuf; for (unsigned int i = 0; i < getNumDiscreteStates();i ++) { featState->setDiscreteState(i, 0); featState->setContinuousState(i, 1.0); } int repetitions = getNumDiscreteStates(); for (int j = 0; it != getStateModifiers()->end(); it ++, stateIt ++, j ++) { repetitions /= (*it)->getNumDiscreteStates(); stateBuf = NULL; if (stateCol->isMember(*it)) { stateBuf = stateCol->getState(*it); } else { stateBuf = *stateIt; (*it)->getModifiedState(stateCol, stateBuf); } if (stateBuf->getStateProperties()->isType(FEATURESTATE)) { for (unsigned int i = 0; i < getNumDiscreteStates(); i++) { unsigned int singleStateFeatureNum = (i / repetitions) % stateBuf->getNumDiscreteStates(); featState->setDiscreteState(i, featState->getDiscreteState(i) + featureOffset * stateBuf->getDiscreteState(singleStateFeatureNum)); featState->setContinuousState(i, featState->getContinuousState(i) * stateBuf->getContinuousState(singleStateFeatureNum)); } } else { for (unsigned int i = 0; i < getNumDiscreteStates(); i++) { featState->setDiscreteState(i, featState->getDiscreteState(i) + featureOffset * stateBuf->getDiscreteState(0)); } } featureOffset = featureOffset * (*it)->getDiscreteStateSize(); } normalizeFeatures(featState); }
void CQAverageTDErrorLearner::receiveError(double error, CStateCollection *state, CAction *action, CActionData *) { CState *featureState = state->getState(averageErrorFunction->getFeatureCalculator()); for (unsigned int i = 0; i < featureState->getNumDiscreteStates(); i++) { int index = featureState->getDiscreteState(i); double featureFac = featureState->getContinuousState(i); double featureVal = averageErrorFunction->getValue(index, action); featureVal = featureVal * (updateRate + (1 - featureFac) * (1 - updateRate)) + error * (1 - updateRate) * featureFac; averageErrorFunction->setValue(index, action, featureVal); } }
void CFeatureOperatorOr::getModifiedState(CStateCollection *stateCol, CState *state) { assert(bInit); std::list<CStateModifier *>::iterator it = getStateModifiers()->begin(); std::list<CState *>::iterator stateIt = states->begin(); CState *stateBuf; int i = 0; int numFeatures = 0; for (; it != getStateModifiers()->end(); it++, stateIt++) { if (stateCol->isMember(*it)) { stateBuf = stateCol->getState(*it); } else { stateBuf = *stateIt; (*it)->getModifiedState(stateCol, stateBuf); } double featureStateFactor = (*this->featureFactors)[*it]; if (stateBuf->getStateProperties()->isType(FEATURESTATE)) { for (unsigned int j = 0; j < stateBuf->getNumDiscreteStates(); j++) { state->setDiscreteState(i, stateBuf->getDiscreteState(j) + numFeatures); state->setContinuousState(i, stateBuf->getContinuousState(j) * featureStateFactor); i ++; } } else { if (stateBuf->getStateProperties()->isType(DISCRETESTATE)) { state->setDiscreteState(i, stateBuf->getDiscreteState(0) + numFeatures); state->setContinuousState(i, featureStateFactor); i ++; } } numFeatures += (*it)->getDiscreteStateSize(); } normalizeFeatures(state); }
void CFeatureRewardModel::nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState) { CFeatureMap *featMap; CState *oldS = oldState->getState(properties); CState *newS = newState->getState(properties); double oldreward = 0.0; double visits = 0.0; int actionIndex = getActions()->getIndex(action); int type = oldS->getStateProperties()->getType() & (DISCRETESTATE | FEATURESTATE); switch (type) { case FEATURESTATE: { for (unsigned int oldIndex = 0; oldIndex < oldS->getNumDiscreteStates(); oldIndex++) { int oldFeature = oldS->getDiscreteState(oldIndex); featMap = rewardTable->get(actionIndex, oldFeature); for (unsigned int newIndex = 0; newIndex < newS->getNumDiscreteStates(); newIndex++) { int newFeature = newS->getDiscreteState(newIndex); oldreward = featMap->getValue(newFeature); (*featMap)[newFeature] = oldreward + reward * newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex); if (!bExternVisitSparse) { visits = visitTable->get(actionIndex, oldFeature)->getValue(newFeature); (*visitTable->get(actionIndex, oldFeature))[newFeature] = visits + newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);; } } } break; } case DISCRETESTATE: default: { featMap = rewardTable->get(actionIndex, oldS->getDiscreteStateNumber()); oldreward = featMap->getValue(newS->getDiscreteStateNumber()); int feata = oldS->getDiscreteStateNumber(); int featb = newS->getDiscreteStateNumber(); (*featMap)[featb] = oldreward + reward; if (!bExternVisitSparse) { visits = visitTable->get(actionIndex, feata)->getValue(featb); (*visitTable->get(actionIndex, feata))[featb] = visits + 1.0; } break; } } }