void CCAGradientPolicyNumericInputDerivationCalculator::getInputDerivation(CStateCollection *inputStateCol, Matrix *targetVector) { CStateProperties *modelState = policy->getStateProperties(); CState *inputState = stateBuffer->getState(modelState); inputState->setState(inputStateCol->getState(modelState)); double stepSize = getParameter("NumericInputDerivationStepSize"); DebugPrint('p', "Calculating Numeric Policy Input Derivation\n");; for (unsigned int col = 0; col < modelState->getNumContinuousStates(); col++) { double stepSize_i = (modelState->getMaxValue(col) - modelState->getMinValue(col)) * stepSize; inputState->setContinuousState(col, inputState->getContinuousState(col) + stepSize_i); stateBuffer->newModelState(); policy->getNextContinuousAction(stateBuffer, contDataPlus); if (DebugIsEnabled('p')) { DebugPrint('p', "State : "); inputState->saveASCII(DebugGetFileHandle('p')); DebugPrint('p', "Action : "); contDataPlus->saveASCII(DebugGetFileHandle('p')); } inputState->setContinuousState(col, inputState->getContinuousState(col) - 2 * stepSize_i); stateBuffer->newModelState(); policy->getNextContinuousAction(stateBuffer, contDataMinus); if (DebugIsEnabled('p')) { DebugPrint('p', "State : "); inputState->saveASCII(DebugGetFileHandle('p')); DebugPrint('p', "Action : "); contDataMinus->saveASCII(DebugGetFileHandle('p')); } inputState->setContinuousState(col, inputState->getContinuousState(col) + stepSize_i); for (int row = 0; row < policy->getNumOutputs(); row ++) { targetVector->element(row, col) = (contDataPlus->getActionValue(row) - contDataMinus->getActionValue(row)) / (2 * stepSize_i); } } }
void CContinuousActionGradientPolicy::getGradientPre(ColumnVector *input, ColumnVector *outputErrors, CFeatureList *gradientFeatures) { CFeatureList *featureList = new CFeatureList(); CState *state = new CState(modelState); for (int i = 0; i < getNumInputs(); i ++) { state->setContinuousState(i, input->element(i)); } for (int i = 0; i < getNumOutputs(); i++) { getGradient(state, i, featureList); gradientFeatures->add(featureList, outputErrors->element(i)); } delete featureList; delete state; }
void CContinuousActionGradientPolicy::getFunctionValuePre(ColumnVector *input, ColumnVector *output) { CState *state = new CState(modelState); CContinuousActionData *data = dynamic_cast<CContinuousActionData *>(contAction->getNewActionData()); for (int i = 0; i < getNumInputs(); i ++) { state->setContinuousState(i, input->element(i)); } getNextContinuousAction(state, data); for (int i = 0; i < getNumOutputs(); i ++) { output->element(i) = state->getContinuousState(i); } delete state; delete data; }