void CFeatureOperatorAnd::getModifiedState(CStateCollection *stateCol, CState *featState) { int featureOffset = 1; std::list<CStateModifier *>::iterator it = getStateModifiers()->begin(); std::list<CState *>::iterator stateIt = states->begin(); CState *stateBuf; for (unsigned int i = 0; i < getNumDiscreteStates();i ++) { featState->setDiscreteState(i, 0); featState->setContinuousState(i, 1.0); } int repetitions = getNumDiscreteStates(); for (int j = 0; it != getStateModifiers()->end(); it ++, stateIt ++, j ++) { repetitions /= (*it)->getNumDiscreteStates(); stateBuf = NULL; if (stateCol->isMember(*it)) { stateBuf = stateCol->getState(*it); } else { stateBuf = *stateIt; (*it)->getModifiedState(stateCol, stateBuf); } if (stateBuf->getStateProperties()->isType(FEATURESTATE)) { for (unsigned int i = 0; i < getNumDiscreteStates(); i++) { unsigned int singleStateFeatureNum = (i / repetitions) % stateBuf->getNumDiscreteStates(); featState->setDiscreteState(i, featState->getDiscreteState(i) + featureOffset * stateBuf->getDiscreteState(singleStateFeatureNum)); featState->setContinuousState(i, featState->getContinuousState(i) * stateBuf->getContinuousState(singleStateFeatureNum)); } } else { for (unsigned int i = 0; i < getNumDiscreteStates(); i++) { featState->setDiscreteState(i, featState->getDiscreteState(i) + featureOffset * stateBuf->getDiscreteState(0)); } } featureOffset = featureOffset * (*it)->getDiscreteStateSize(); } normalizeFeatures(featState); }
void CFeatureOperatorOr::getModifiedState(CStateCollection *stateCol, CState *state) { assert(bInit); std::list<CStateModifier *>::iterator it = getStateModifiers()->begin(); std::list<CState *>::iterator stateIt = states->begin(); CState *stateBuf; int i = 0; int numFeatures = 0; for (; it != getStateModifiers()->end(); it++, stateIt++) { if (stateCol->isMember(*it)) { stateBuf = stateCol->getState(*it); } else { stateBuf = *stateIt; (*it)->getModifiedState(stateCol, stateBuf); } double featureStateFactor = (*this->featureFactors)[*it]; if (stateBuf->getStateProperties()->isType(FEATURESTATE)) { for (unsigned int j = 0; j < stateBuf->getNumDiscreteStates(); j++) { state->setDiscreteState(i, stateBuf->getDiscreteState(j) + numFeatures); state->setContinuousState(i, stateBuf->getContinuousState(j) * featureStateFactor); i ++; } } else { if (stateBuf->getStateProperties()->isType(DISCRETESTATE)) { state->setDiscreteState(i, stateBuf->getDiscreteState(0) + numFeatures); state->setContinuousState(i, featureStateFactor); i ++; } } numFeatures += (*it)->getDiscreteStateSize(); } normalizeFeatures(state); }
void CEpisodeMatlabOutput::nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState) { CActionData *actionData = action->getActionData(); fprintf(stream, "%d %d ", nEpisodes, nSteps); CState *state = oldState->getState(properties); unsigned int i = 0; for (i = 0; i < properties->getNumContinuousStates(); i++) { fprintf(stream, "%lf ", state->getContinuousState(i)); } for (i = 0; i < properties->getNumDiscreteStates(); i++) { fprintf(stream, "%d ", (int)state->getDiscreteState(i)); } state = nextState->getState(properties); for (i = 0; i < properties->getNumContinuousStates(); i++) { fprintf(stream, "%lf ", state->getContinuousState(i)); } for (i = 0; i < properties->getNumDiscreteStates(); i++) { fprintf(stream, "%d ", (int)state->getDiscreteState(i)); } fprintf(stream,"%d ", actions->getIndex(action)); if (actionData != NULL) { CContinuousActionData *contData = dynamic_cast<CContinuousActionData *>(actionData); for (int j = 0; j < contData->nrows(); j++) { fprintf(stream, "%lf ", contData->element(j)); } } fprintf(stream, "%lf ", reward); fprintf(stream, "\n"); nSteps++; fflush(stream); }
void CStateVariablesChooser::getModifiedState(CStateCollection *originalStateCol, CState *modifiedState) { CState *origState = originalStateCol->getState(originalState); for (unsigned int i = 0; i < getNumContinuousStates(); i ++) { modifiedState->setContinuousState(i, origState->getContinuousState(contStatesInd[i])); } for (unsigned int i = 0; i < getNumDiscreteStates(); i ++) { modifiedState->setDiscreteState(i, origState->getDiscreteState(discStatesInd[i])); } }
void CFeatureStateRewardModel::nextStep(CStateCollection *, CAction *, double reward, CStateCollection *newStateCol) { CState *newState = newStateCol->getState(discretizer); if (newState->getStateProperties()->isType(FEATURESTATE)) { for (unsigned int i = 0; i < newState->getNumContinuousStates(); i++) { rewards[newState->getDiscreteState(i)] += reward * newState->getContinuousState(i); visits[newState->getDiscreteState(i)] += newState->getContinuousState(i); } } else { if (newState->getStateProperties()->isType(DISCRETESTATE)) { rewards[newState->getDiscreteState(0)] += reward ; visits[newState->getDiscreteState(0)] += 1.0; } } }
void CQAverageTDErrorLearner::receiveError(double error, CStateCollection *state, CAction *action, CActionData *) { CState *featureState = state->getState(averageErrorFunction->getFeatureCalculator()); for (unsigned int i = 0; i < featureState->getNumDiscreteStates(); i++) { int index = featureState->getDiscreteState(i); double featureFac = featureState->getContinuousState(i); double featureVal = averageErrorFunction->getValue(index, action); featureVal = featureVal * (updateRate + (1 - featureFac) * (1 - updateRate)) + error * (1 - updateRate) * featureFac; averageErrorFunction->setValue(index, action, featureVal); } }
void HashTableETraces::addETrace(CStateCollection *state, CAction *action, double factor, CActionData *data) { // _currentState = state; CState* currState = state->getState(); int it = currState->getDiscreteState(0); // DebugPrint('e',"\n[+] Add e-trace to state : %d and action %d", it, dynamic_cast<MultiBoost::CAdaBoostAction*>(action)->getMode()); _eTraces.push_back(factor); _eTraceStates.push_back( MDDAGState( currState )); // _eTraceStates->addStateCollection(state); // int actionIndex = dynamic_cast<MultiBoost::CAdaBoostAction*>(action)->getMode(); _actions.push_back( action ); }
void CFeatureStateNNInput::getModifiedState(CStateCollection *stateCol, CState *state) { CState *featureStateBuff; state->resetState(); if (stateCol->isMember(featureStateCalc)) { featureStateBuff = stateCol->getState(featureStateCalc); } else { featureStateCalc->getModifiedState(stateCol, featureState); featureStateBuff = featureState; } for (unsigned int i = 0;i < featureStateCalc->getNumActiveFeatures(); i++) { state->setContinuousState(featureStateBuff->getDiscreteState(i), featureStateBuff->getContinuousState(i)); } }
void CStateOutput::nextStep(CStateCollection *, CAction *, CStateCollection *nextState) { // if (isFirst) // { // oldState->getState(properties)->saveASCII(stream); // isFirst = false; // printf("\n"); // } unsigned int i = 0; CState *state = nextState->getState(properties); for (i = 0; i < properties->getNumContinuousStates(); i++) { fprintf(stream, "%lf ", state->getContinuousState(i)); } for (i = 0; i < properties->getNumDiscreteStates(); i++) { fprintf(stream, "%d ", (int)state->getDiscreteState(i)); } fprintf(stream, "\n"); fflush(stream); }
void CFeatureRewardModel::nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState) { CFeatureMap *featMap; CState *oldS = oldState->getState(properties); CState *newS = newState->getState(properties); double oldreward = 0.0; double visits = 0.0; int actionIndex = getActions()->getIndex(action); int type = oldS->getStateProperties()->getType() & (DISCRETESTATE | FEATURESTATE); switch (type) { case FEATURESTATE: { for (unsigned int oldIndex = 0; oldIndex < oldS->getNumDiscreteStates(); oldIndex++) { int oldFeature = oldS->getDiscreteState(oldIndex); featMap = rewardTable->get(actionIndex, oldFeature); for (unsigned int newIndex = 0; newIndex < newS->getNumDiscreteStates(); newIndex++) { int newFeature = newS->getDiscreteState(newIndex); oldreward = featMap->getValue(newFeature); (*featMap)[newFeature] = oldreward + reward * newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex); if (!bExternVisitSparse) { visits = visitTable->get(actionIndex, oldFeature)->getValue(newFeature); (*visitTable->get(actionIndex, oldFeature))[newFeature] = visits + newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);; } } } break; } case DISCRETESTATE: default: { featMap = rewardTable->get(actionIndex, oldS->getDiscreteStateNumber()); oldreward = featMap->getValue(newS->getDiscreteStateNumber()); int feata = oldS->getDiscreteStateNumber(); int featb = newS->getDiscreteStateNumber(); (*featMap)[featb] = oldreward + reward; if (!bExternVisitSparse) { visits = visitTable->get(actionIndex, feata)->getValue(featb); (*visitTable->get(actionIndex, feata))[featb] = visits + 1.0; } break; } } }