void CCAGradientPolicyNumericInputDerivationCalculator::getInputDerivation(CStateCollection *inputStateCol, Matrix *targetVector) { CStateProperties *modelState = policy->getStateProperties(); CState *inputState = stateBuffer->getState(modelState); inputState->setState(inputStateCol->getState(modelState)); double stepSize = getParameter("NumericInputDerivationStepSize"); DebugPrint('p', "Calculating Numeric Policy Input Derivation\n");; for (unsigned int col = 0; col < modelState->getNumContinuousStates(); col++) { double stepSize_i = (modelState->getMaxValue(col) - modelState->getMinValue(col)) * stepSize; inputState->setContinuousState(col, inputState->getContinuousState(col) + stepSize_i); stateBuffer->newModelState(); policy->getNextContinuousAction(stateBuffer, contDataPlus); if (DebugIsEnabled('p')) { DebugPrint('p', "State : "); inputState->saveASCII(DebugGetFileHandle('p')); DebugPrint('p', "Action : "); contDataPlus->saveASCII(DebugGetFileHandle('p')); } inputState->setContinuousState(col, inputState->getContinuousState(col) - 2 * stepSize_i); stateBuffer->newModelState(); policy->getNextContinuousAction(stateBuffer, contDataMinus); if (DebugIsEnabled('p')) { DebugPrint('p', "State : "); inputState->saveASCII(DebugGetFileHandle('p')); DebugPrint('p', "Action : "); contDataMinus->saveASCII(DebugGetFileHandle('p')); } inputState->setContinuousState(col, inputState->getContinuousState(col) + stepSize_i); for (int row = 0; row < policy->getNumOutputs(); row ++) { targetVector->element(row, col) = (contDataPlus->getActionValue(row) - contDataMinus->getActionValue(row)) / (2 * stepSize_i); } } }
void CEpisodeMatlabOutput::nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *nextState) { CActionData *actionData = action->getActionData(); fprintf(stream, "%d %d ", nEpisodes, nSteps); CState *state = oldState->getState(properties); unsigned int i = 0; for (i = 0; i < properties->getNumContinuousStates(); i++) { fprintf(stream, "%lf ", state->getContinuousState(i)); } for (i = 0; i < properties->getNumDiscreteStates(); i++) { fprintf(stream, "%d ", (int)state->getDiscreteState(i)); } state = nextState->getState(properties); for (i = 0; i < properties->getNumContinuousStates(); i++) { fprintf(stream, "%lf ", state->getContinuousState(i)); } for (i = 0; i < properties->getNumDiscreteStates(); i++) { fprintf(stream, "%d ", (int)state->getDiscreteState(i)); } fprintf(stream,"%d ", actions->getIndex(action)); if (actionData != NULL) { CContinuousActionData *contData = dynamic_cast<CContinuousActionData *>(actionData); for (int j = 0; j < contData->nrows(); j++) { fprintf(stream, "%lf ", contData->element(j)); } } fprintf(stream, "%lf ", reward); fprintf(stream, "\n"); nSteps++; fflush(stream); }
void CStateVariablesChooser::getModifiedState(CStateCollection *originalStateCol, CState *modifiedState) { CState *origState = originalStateCol->getState(originalState); for (unsigned int i = 0; i < getNumContinuousStates(); i ++) { modifiedState->setContinuousState(i, origState->getContinuousState(contStatesInd[i])); } for (unsigned int i = 0; i < getNumDiscreteStates(); i ++) { modifiedState->setDiscreteState(i, origState->getDiscreteState(discStatesInd[i])); } }
void CFeatureStateRewardModel::nextStep(CStateCollection *, CAction *, double reward, CStateCollection *newStateCol) { CState *newState = newStateCol->getState(discretizer); if (newState->getStateProperties()->isType(FEATURESTATE)) { for (unsigned int i = 0; i < newState->getNumContinuousStates(); i++) { rewards[newState->getDiscreteState(i)] += reward * newState->getContinuousState(i); visits[newState->getDiscreteState(i)] += newState->getContinuousState(i); } } else { if (newState->getStateProperties()->isType(DISCRETESTATE)) { rewards[newState->getDiscreteState(0)] += reward ; visits[newState->getDiscreteState(0)] += 1.0; } } }
void CFeatureOperatorAnd::getModifiedState(CStateCollection *stateCol, CState *featState) { int featureOffset = 1; std::list<CStateModifier *>::iterator it = getStateModifiers()->begin(); std::list<CState *>::iterator stateIt = states->begin(); CState *stateBuf; for (unsigned int i = 0; i < getNumDiscreteStates();i ++) { featState->setDiscreteState(i, 0); featState->setContinuousState(i, 1.0); } int repetitions = getNumDiscreteStates(); for (int j = 0; it != getStateModifiers()->end(); it ++, stateIt ++, j ++) { repetitions /= (*it)->getNumDiscreteStates(); stateBuf = NULL; if (stateCol->isMember(*it)) { stateBuf = stateCol->getState(*it); } else { stateBuf = *stateIt; (*it)->getModifiedState(stateCol, stateBuf); } if (stateBuf->getStateProperties()->isType(FEATURESTATE)) { for (unsigned int i = 0; i < getNumDiscreteStates(); i++) { unsigned int singleStateFeatureNum = (i / repetitions) % stateBuf->getNumDiscreteStates(); featState->setDiscreteState(i, featState->getDiscreteState(i) + featureOffset * stateBuf->getDiscreteState(singleStateFeatureNum)); featState->setContinuousState(i, featState->getContinuousState(i) * stateBuf->getContinuousState(singleStateFeatureNum)); } } else { for (unsigned int i = 0; i < getNumDiscreteStates(); i++) { featState->setDiscreteState(i, featState->getDiscreteState(i) + featureOffset * stateBuf->getDiscreteState(0)); } } featureOffset = featureOffset * (*it)->getDiscreteStateSize(); } normalizeFeatures(featState); }
void CQAverageTDErrorLearner::receiveError(double error, CStateCollection *state, CAction *action, CActionData *) { CState *featureState = state->getState(averageErrorFunction->getFeatureCalculator()); for (unsigned int i = 0; i < featureState->getNumDiscreteStates(); i++) { int index = featureState->getDiscreteState(i); double featureFac = featureState->getContinuousState(i); double featureVal = averageErrorFunction->getValue(index, action); featureVal = featureVal * (updateRate + (1 - featureFac) * (1 - updateRate)) + error * (1 - updateRate) * featureFac; averageErrorFunction->setValue(index, action, featureVal); } }
void CContinuousActionPolicyFromGradientFunction::getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures) { *outputError = 0; outputError->element(outputDimension) = 1.0; ColumnVector input(getNumInputs()); CState *state = inputState->getState(modelState); for (int i = 0; i < getNumInputs(); i++) { input.element(i) = state->getContinuousState(i); } gradientFunction->getGradient(&input, outputError, gradientFeatures); }
void CFeatureOperatorOr::getModifiedState(CStateCollection *stateCol, CState *state) { assert(bInit); std::list<CStateModifier *>::iterator it = getStateModifiers()->begin(); std::list<CState *>::iterator stateIt = states->begin(); CState *stateBuf; int i = 0; int numFeatures = 0; for (; it != getStateModifiers()->end(); it++, stateIt++) { if (stateCol->isMember(*it)) { stateBuf = stateCol->getState(*it); } else { stateBuf = *stateIt; (*it)->getModifiedState(stateCol, stateBuf); } double featureStateFactor = (*this->featureFactors)[*it]; if (stateBuf->getStateProperties()->isType(FEATURESTATE)) { for (unsigned int j = 0; j < stateBuf->getNumDiscreteStates(); j++) { state->setDiscreteState(i, stateBuf->getDiscreteState(j) + numFeatures); state->setContinuousState(i, stateBuf->getContinuousState(j) * featureStateFactor); i ++; } } else { if (stateBuf->getStateProperties()->isType(DISCRETESTATE)) { state->setDiscreteState(i, stateBuf->getDiscreteState(0) + numFeatures); state->setContinuousState(i, featureStateFactor); i ++; } } numFeatures += (*it)->getDiscreteStateSize(); } normalizeFeatures(state); }
void CContinuousActionGradientPolicy::getFunctionValuePre(ColumnVector *input, ColumnVector *output) { CState *state = new CState(modelState); CContinuousActionData *data = dynamic_cast<CContinuousActionData *>(contAction->getNewActionData()); for (int i = 0; i < getNumInputs(); i ++) { state->setContinuousState(i, input->element(i)); } getNextContinuousAction(state, data); for (int i = 0; i < getNumOutputs(); i ++) { output->element(i) = state->getContinuousState(i); } delete state; delete data; }
void CFeatureStateNNInput::getModifiedState(CStateCollection *stateCol, CState *state) { CState *featureStateBuff; state->resetState(); if (stateCol->isMember(featureStateCalc)) { featureStateBuff = stateCol->getState(featureStateCalc); } else { featureStateCalc->getModifiedState(stateCol, featureState); featureStateBuff = featureState; } for (unsigned int i = 0;i < featureStateCalc->getNumActiveFeatures(); i++) { state->setContinuousState(featureStateBuff->getDiscreteState(i), featureStateBuff->getContinuousState(i)); } }
void CStateOutput::nextStep(CStateCollection *, CAction *, CStateCollection *nextState) { // if (isFirst) // { // oldState->getState(properties)->saveASCII(stream); // isFirst = false; // printf("\n"); // } unsigned int i = 0; CState *state = nextState->getState(properties); for (i = 0; i < properties->getNumContinuousStates(); i++) { fprintf(stream, "%lf ", state->getContinuousState(i)); } for (i = 0; i < properties->getNumDiscreteStates(); i++) { fprintf(stream, "%d ", (int)state->getDiscreteState(i)); } fprintf(stream, "\n"); fflush(stream); }
void CNeuralNetworkStateModifier::getModifiedState(CStateCollection *originalStateCol, CState *modifiedState) { int contStateIndex = 0; // set Discrete States CState *state = originalStateCol->getState(originalState); // for (unsigned int i = 0; i < originalState->getNumDiscreteStates(); i++) // { // modifiedState->setDiscreteState(i, state->getDiscreteState(i)); // } *buffVector = 0; if (dimensions == NULL) { for (unsigned int i = 0; i < originalState->getNumContinuousStates(); i++) { buffVector->element(i) = state->getContinuousState(i); } if (normValues) { preprocessInput(buffVector, buffVector); } for (unsigned int i = 0; i < originalState->getNumContinuousStates(); i++) { double stateVal = buffVector->element(i); if (originalState->getPeriodicity(i)) { modifiedState->setContinuousState(contStateIndex ++, sin(stateVal * M_PI)); modifiedState->setContinuousState(contStateIndex ++, cos(stateVal * M_PI)); } else { modifiedState->setContinuousState(contStateIndex ++, stateVal); } } } else { for (unsigned int i = 0; i < numDim; i++) { buffVector->element(i) = state->getContinuousState(dimensions[i]); } if (normValues) { preprocessInput(buffVector, buffVector); } for (unsigned int i = 0; i <numDim; i++) { double stateVal = buffVector->element(i); if (originalState->getPeriodicity(dimensions[i])) { modifiedState->setContinuousState(contStateIndex, sin(stateVal * M_PI)); contStateIndex ++; modifiedState->setContinuousState(contStateIndex, cos(stateVal * M_PI)); contStateIndex ++; } else { modifiedState->setContinuousState(contStateIndex, stateVal); contStateIndex ++; } } /*for (unsigned int i = 0; i <numDim; i++) { double width = originalState->getMaxValue(dimensions[i]) - originalState->getMinValue(dimensions[i]); double stateVal = 0.0; if (normValues == true) { stateVal = (state->getContinuousState(dimensions[i]) - originalState->getMinValue(dimensions[i])) / width * 2 - 1.0; } else { stateVal = state->getContinuousState(dimensions[i]); } if (originalState->getPeriodicity(dimensions[i])) { modifiedState->setContinuousState(contStateIndex ++, sin(stateVal * M_PI)); modifiedState->setContinuousState(contStateIndex ++, cos(stateVal * M_PI)); } else { modifiedState->setContinuousState(contStateIndex ++, stateVal); } }*/ } }
void CSingleStateFeatureCalculator::getModifiedState(CStateCollection *stateCol, CState *featState) { CState *state = stateCol->getState(originalState); CStateProperties *properties = state->getStateProperties(); double contState = state->getContinuousState(dimension); double width = properties->getMaxValue(dimension) - properties->getMinValue(dimension); if (contState < partitions[0] && properties->getPeriodicity(dimension)) { contState += width; } unsigned int activeFeature = 0; unsigned int featureNum = 0, realfeatureNum = 0; int featureIndex = 0; double part = partitions[activeFeature]; while (activeFeature < numFeatures && part < contState) { activeFeature++; if (activeFeature < numFeatures) { part = partitions[activeFeature]; } if (part < partitions[0]) { assert(properties->getPeriodicity(dimension)); part += width; } } if (activeFeature == numFeatures && !properties->getPeriodicity(dimension)) { featureNum ++; } DebugPrint('l', "Single State Features: ["); for (; realfeatureNum < this->numActiveFeatures; realfeatureNum++, featureNum ++) { if (featureNum % 2 == 0) { featureIndex = activeFeature + featureNum / 2; } else { featureIndex = activeFeature - (featureNum / 2 + 1); } if (state->getStateProperties()->getPeriodicity(dimension)) { featureIndex = featureIndex % numFeatures; } if (featureIndex >= 0 && featureIndex < (signed int) numFeatures) { featState->setDiscreteState(realfeatureNum, featureIndex); double stateDiff = state->getSingleStateDifference(dimension, partitions[featureIndex]); double diffNextPart = 1.0; if (!state->getStateProperties()->getPeriodicity(dimension)) { if (featureIndex == 0 && stateDiff <= 0) { stateDiff = 0; } else { if (featureIndex == (signed int) (numFeatures - 1) && stateDiff > 0) { stateDiff = 0; } else { if (stateDiff <= 0) { diffNextPart = partitions[featureIndex] - partitions[featureIndex - 1]; } else { diffNextPart = partitions[featureIndex + 1] - partitions[featureIndex]; } } } } else { if (stateDiff <= 0) { diffNextPart = partitions[featureIndex] - partitions[(numPartitions + featureIndex - 1) % numPartitions]; } else { diffNextPart = partitions[(featureIndex + 1) % numPartitions] - partitions[featureIndex]; } if (diffNextPart < 0) { diffNextPart += width; } if (diffNextPart > 0) { diffNextPart -= width; } } featState->setContinuousState(realfeatureNum, getFeatureFactor(featureIndex, stateDiff, diffNextPart)); DebugPrint('l', "%f %f, ", partitions[featureIndex], featState->getContinuousState(realfeatureNum)); } else { featState->setContinuousState(realfeatureNum, 0.0); featState->setDiscreteState(realfeatureNum, 0); } } this->normalizeFeatures(featState); DebugPrint('l', "]\n"); }
void CFeatureRewardModel::nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState) { CFeatureMap *featMap; CState *oldS = oldState->getState(properties); CState *newS = newState->getState(properties); double oldreward = 0.0; double visits = 0.0; int actionIndex = getActions()->getIndex(action); int type = oldS->getStateProperties()->getType() & (DISCRETESTATE | FEATURESTATE); switch (type) { case FEATURESTATE: { for (unsigned int oldIndex = 0; oldIndex < oldS->getNumDiscreteStates(); oldIndex++) { int oldFeature = oldS->getDiscreteState(oldIndex); featMap = rewardTable->get(actionIndex, oldFeature); for (unsigned int newIndex = 0; newIndex < newS->getNumDiscreteStates(); newIndex++) { int newFeature = newS->getDiscreteState(newIndex); oldreward = featMap->getValue(newFeature); (*featMap)[newFeature] = oldreward + reward * newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex); if (!bExternVisitSparse) { visits = visitTable->get(actionIndex, oldFeature)->getValue(newFeature); (*visitTable->get(actionIndex, oldFeature))[newFeature] = visits + newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);; } } } break; } case DISCRETESTATE: default: { featMap = rewardTable->get(actionIndex, oldS->getDiscreteStateNumber()); oldreward = featMap->getValue(newS->getDiscreteStateNumber()); int feata = oldS->getDiscreteStateNumber(); int featb = newS->getDiscreteStateNumber(); (*featMap)[featb] = oldreward + reward; if (!bExternVisitSparse) { visits = visitTable->get(actionIndex, feata)->getValue(featb); (*visitTable->get(actionIndex, feata))[featb] = visits + 1.0; } break; } } }