void CCAGradientPolicyNumericInputDerivationCalculator::getInputDerivation(CStateCollection *inputStateCol, Matrix *targetVector)
{
	CStateProperties *modelState = policy->getStateProperties();
	CState *inputState = stateBuffer->getState(modelState);
	inputState->setState(inputStateCol->getState(modelState));

	double stepSize = getParameter("NumericInputDerivationStepSize");

	DebugPrint('p', "Calculating Numeric Policy Input Derivation\n");;
	for (unsigned int col = 0; col < modelState->getNumContinuousStates(); col++)
	{
		double stepSize_i = (modelState->getMaxValue(col) - modelState->getMinValue(col)) * stepSize;
		inputState->setContinuousState(col, inputState->getContinuousState(col) + stepSize_i);
		stateBuffer->newModelState();
		policy->getNextContinuousAction(stateBuffer, contDataPlus);
		
		if (DebugIsEnabled('p'))
		{
			DebugPrint('p', "State : ");
			inputState->saveASCII(DebugGetFileHandle('p'));

			DebugPrint('p', "Action : ");
			contDataPlus->saveASCII(DebugGetFileHandle('p'));
		}

		inputState->setContinuousState(col, inputState->getContinuousState(col) - 2 * stepSize_i);
		stateBuffer->newModelState();
		policy->getNextContinuousAction(stateBuffer, contDataMinus);

		if (DebugIsEnabled('p'))
		{
			DebugPrint('p', "State : ");
			inputState->saveASCII(DebugGetFileHandle('p'));

			DebugPrint('p', "Action : ");
			contDataMinus->saveASCII(DebugGetFileHandle('p'));
		}

		inputState->setContinuousState(col, inputState->getContinuousState(col) + stepSize_i);
		for (int row = 0; row < policy->getNumOutputs(); row ++)
		{
			targetVector->element(row, col) = (contDataPlus->getActionValue(row) - contDataMinus->getActionValue(row)) / (2 * stepSize_i);
		}
	}
}
Esempio n. 2
0
void CEpisodeMatlabOutput::nextStep(CStateCollection *oldState, CAction *action, double reward,  CStateCollection *nextState)
{
	CActionData *actionData = action->getActionData();

	fprintf(stream, "%d %d ", nEpisodes, nSteps);
	CState *state = oldState->getState(properties);
	unsigned int i = 0;
	for (i = 0; i < properties->getNumContinuousStates(); i++)
	{
		fprintf(stream, "%lf ", state->getContinuousState(i));
	}
	for (i = 0; i < properties->getNumDiscreteStates(); i++)
	{
		fprintf(stream, "%d ", (int)state->getDiscreteState(i));
	}
	
	state = nextState->getState(properties);
	for (i = 0; i < properties->getNumContinuousStates(); i++)
	{
		fprintf(stream, "%lf ", state->getContinuousState(i));
	}
	for (i = 0; i < properties->getNumDiscreteStates(); i++)
	{
		fprintf(stream, "%d ", (int)state->getDiscreteState(i));
	}
	
	fprintf(stream,"%d ", actions->getIndex(action));
	if (actionData != NULL)
	{
		CContinuousActionData *contData = dynamic_cast<CContinuousActionData *>(actionData);
		for (int j = 0; j < contData->nrows(); j++)
		{
			fprintf(stream, "%lf ", contData->element(j));
		}
	}
	
	fprintf(stream, "%lf ", reward);
		
	fprintf(stream, "\n");
	
	nSteps++;
	
	fflush(stream);
}
Esempio n. 3
0
void CStateVariablesChooser::getModifiedState(CStateCollection *originalStateCol, CState *modifiedState)
{
	CState *origState = originalStateCol->getState(originalState);
	for (unsigned int i = 0; i < getNumContinuousStates(); i ++)
	{
		modifiedState->setContinuousState(i, origState->getContinuousState(contStatesInd[i]));
	}
	for (unsigned int i = 0; i < getNumDiscreteStates(); i ++)
	{
		modifiedState->setDiscreteState(i, origState->getDiscreteState(discStatesInd[i]));
	}
}
Esempio n. 4
0
void CFeatureStateRewardModel::nextStep(CStateCollection *, CAction *, double reward, CStateCollection *newStateCol)
{
	CState *newState = newStateCol->getState(discretizer);

	if (newState->getStateProperties()->isType(FEATURESTATE))
	{
		for (unsigned int i = 0; i < newState->getNumContinuousStates(); i++)
		{
			rewards[newState->getDiscreteState(i)] +=  reward * newState->getContinuousState(i);
			visits[newState->getDiscreteState(i)] += newState->getContinuousState(i);
		}
	}
	else
	{
		if (newState->getStateProperties()->isType(DISCRETESTATE))
		{
			rewards[newState->getDiscreteState(0)] +=  reward ;
			visits[newState->getDiscreteState(0)] += 1.0;
		}
	}
}
void CFeatureOperatorAnd::getModifiedState(CStateCollection *stateCol, CState *featState)
{
	int featureOffset = 1;

	std::list<CStateModifier *>::iterator it = getStateModifiers()->begin();
	std::list<CState *>::iterator stateIt = states->begin();


	CState *stateBuf;

	for (unsigned int i = 0; i < getNumDiscreteStates();i ++)
	{
		featState->setDiscreteState(i, 0);
		featState->setContinuousState(i, 1.0);
	}

	int repetitions = getNumDiscreteStates();
	for (int j = 0; it != getStateModifiers()->end(); it ++, stateIt ++, j ++)
	{
		repetitions /= (*it)->getNumDiscreteStates();
		stateBuf = NULL;
		if (stateCol->isMember(*it))
		{
			stateBuf = stateCol->getState(*it);
		}
		else
		{
			stateBuf = *stateIt;
			(*it)->getModifiedState(stateCol, stateBuf);
		}
		
		if (stateBuf->getStateProperties()->isType(FEATURESTATE))
		{
			for (unsigned int i = 0; i < getNumDiscreteStates(); i++)
			{
				unsigned int singleStateFeatureNum = (i / repetitions) % stateBuf->getNumDiscreteStates();
				featState->setDiscreteState(i, featState->getDiscreteState(i) + featureOffset * stateBuf->getDiscreteState(singleStateFeatureNum));
				featState->setContinuousState(i, featState->getContinuousState(i) * stateBuf->getContinuousState(singleStateFeatureNum));
			}
		}
		else
		{
			for (unsigned int i = 0; i < getNumDiscreteStates(); i++)
			{
				featState->setDiscreteState(i, featState->getDiscreteState(i) + featureOffset * stateBuf->getDiscreteState(0));				
			}
		}

		featureOffset = featureOffset * (*it)->getDiscreteStateSize();
	}
	normalizeFeatures(featState);
}
Esempio n. 6
0
void CQAverageTDErrorLearner::receiveError(double error, CStateCollection *state, CAction *action, CActionData *)
{
	CState *featureState = state->getState(averageErrorFunction->getFeatureCalculator());
	
	for (unsigned int i = 0; i < featureState->getNumDiscreteStates(); i++)
	{
		int index = featureState->getDiscreteState(i);
		double featureFac = featureState->getContinuousState(i);
		double featureVal = averageErrorFunction->getValue(index, action);
				
		featureVal = featureVal * (updateRate  + (1 - featureFac) * (1 - updateRate)) + error * (1 - updateRate) * featureFac;
		averageErrorFunction->setValue(index, action, featureVal);
	}
}
void CContinuousActionPolicyFromGradientFunction::getGradient(CStateCollection *inputState, int outputDimension, CFeatureList *gradientFeatures)
{
	*outputError = 0;
	outputError->element(outputDimension) = 1.0;
	ColumnVector input(getNumInputs());
	
	CState *state = inputState->getState(modelState);
	for (int i = 0; i < getNumInputs(); i++)
	{
		input.element(i) = state->getContinuousState(i);
	}
	
	gradientFunction->getGradient(&input, outputError, gradientFeatures);
}
void CFeatureOperatorOr::getModifiedState(CStateCollection *stateCol, CState *state)
{
	assert(bInit);

	std::list<CStateModifier *>::iterator it = getStateModifiers()->begin();
	std::list<CState *>::iterator stateIt = states->begin();

	CState *stateBuf;

	int i = 0;
	int numFeatures = 0;

	for (; it != getStateModifiers()->end(); it++, stateIt++)
	{
		if (stateCol->isMember(*it))
		{
			stateBuf = stateCol->getState(*it);
		}
		else
		{
			stateBuf = *stateIt;
			(*it)->getModifiedState(stateCol, stateBuf);
		}
		double featureStateFactor = (*this->featureFactors)[*it];
		if (stateBuf->getStateProperties()->isType(FEATURESTATE))
		{
			for (unsigned int j = 0; j < stateBuf->getNumDiscreteStates(); j++)
			{
				state->setDiscreteState(i, stateBuf->getDiscreteState(j) + numFeatures);
				state->setContinuousState(i, stateBuf->getContinuousState(j) * featureStateFactor);
				i ++;
			}
		}
		else
		{
			if (stateBuf->getStateProperties()->isType(DISCRETESTATE))
			{
				state->setDiscreteState(i, stateBuf->getDiscreteState(0) + numFeatures);
				state->setContinuousState(i, featureStateFactor);
				i ++;
			}
		}

		numFeatures += (*it)->getDiscreteStateSize();
	}
	normalizeFeatures(state);
}
void CContinuousActionGradientPolicy::getFunctionValuePre(ColumnVector *input, ColumnVector *output)
{
	CState *state = new CState(modelState);
	CContinuousActionData *data = dynamic_cast<CContinuousActionData *>(contAction->getNewActionData());
	
	for (int i = 0; i < getNumInputs(); i ++)
	{	
		state->setContinuousState(i, input->element(i));
	}
	
	getNextContinuousAction(state, data);
	for (int i = 0; i < getNumOutputs(); i ++)
	{	
		output->element(i) = state->getContinuousState(i);
	}

	delete state;
	delete data;
}
void CFeatureStateNNInput::getModifiedState(CStateCollection *stateCol, CState *state)
{
	CState *featureStateBuff;
	state->resetState();

	if (stateCol->isMember(featureStateCalc))
	{
		featureStateBuff = stateCol->getState(featureStateCalc);
	}
	else
	{
		featureStateCalc->getModifiedState(stateCol, featureState);
		featureStateBuff = featureState;
	}

	for (unsigned int i = 0;i < featureStateCalc->getNumActiveFeatures(); i++)
	{
		state->setContinuousState(featureStateBuff->getDiscreteState(i), featureStateBuff->getContinuousState(i));
	}
}
Esempio n. 11
0
void CStateOutput::nextStep(CStateCollection *, CAction *, CStateCollection *nextState)
{
//	if (isFirst)
//	{
//		oldState->getState(properties)->saveASCII(stream);		
//		isFirst = false;
//		printf("\n");
//	}
	unsigned int i = 0;
	CState *state = nextState->getState(properties);
	for (i = 0; i < properties->getNumContinuousStates(); i++)
	{
		fprintf(stream, "%lf ", state->getContinuousState(i));
	}
	for (i = 0; i < properties->getNumDiscreteStates(); i++)
	{
		fprintf(stream, "%d ", (int)state->getDiscreteState(i));
	}		
	fprintf(stream, "\n");
	fflush(stream);
}
Esempio n. 12
0
void CNeuralNetworkStateModifier::getModifiedState(CStateCollection *originalStateCol, CState *modifiedState)
{
	int contStateIndex = 0;

	// set Discrete States
	CState *state = originalStateCol->getState(originalState);

//	for (unsigned int i = 0; i < originalState->getNumDiscreteStates(); i++)
//	{
//		modifiedState->setDiscreteState(i, state->getDiscreteState(i));
//	}
	*buffVector = 0;

	if (dimensions == NULL)
	{
		for (unsigned int i = 0; i < originalState->getNumContinuousStates(); i++)
		{
			buffVector->element(i) = state->getContinuousState(i);	
		}
		if (normValues)
		{
			preprocessInput(buffVector, buffVector);
		}
		
		for (unsigned int i = 0; i < originalState->getNumContinuousStates(); i++)
		{
			double stateVal = buffVector->element(i);
		
			if (originalState->getPeriodicity(i))
			{
				modifiedState->setContinuousState(contStateIndex ++, sin(stateVal * M_PI));
				modifiedState->setContinuousState(contStateIndex ++, cos(stateVal * M_PI));
			}
			else
			{
				modifiedState->setContinuousState(contStateIndex ++, stateVal);
			}
		}
	}
	else
	{
		for (unsigned int i = 0; i < numDim; i++)
		{
			buffVector->element(i) = state->getContinuousState(dimensions[i]);	
		}

		if (normValues)
		{
			preprocessInput(buffVector, buffVector);
		}
	
		for (unsigned int i = 0; i <numDim; i++)
		{
			double stateVal = buffVector->element(i);
			
			if (originalState->getPeriodicity(dimensions[i]))
			{
				modifiedState->setContinuousState(contStateIndex, sin(stateVal * M_PI));
				contStateIndex ++;
				modifiedState->setContinuousState(contStateIndex, cos(stateVal * M_PI));
				contStateIndex ++;
			}
			else
			{
				modifiedState->setContinuousState(contStateIndex, stateVal);
				contStateIndex ++;
			}
		}
	
		/*for (unsigned int i = 0; i <numDim; i++)
		{
			double width = originalState->getMaxValue(dimensions[i]) - originalState->getMinValue(dimensions[i]);
			double stateVal = 0.0;
			if (normValues == true)
			{
				stateVal = (state->getContinuousState(dimensions[i]) - originalState->getMinValue(dimensions[i])) / width * 2 - 1.0;
			}
			else
			{
				stateVal = state->getContinuousState(dimensions[i]);
			}
			
			if (originalState->getPeriodicity(dimensions[i]))
			{
				modifiedState->setContinuousState(contStateIndex ++, sin(stateVal * M_PI));
				modifiedState->setContinuousState(contStateIndex ++, cos(stateVal * M_PI));
			}
			else
			{
				modifiedState->setContinuousState(contStateIndex ++, stateVal);
			}
		}*/
	}
	
}
void CSingleStateFeatureCalculator::getModifiedState(CStateCollection *stateCol, CState *featState)
{
	CState *state = stateCol->getState(originalState);
	CStateProperties *properties = state->getStateProperties();
	double contState = state->getContinuousState(dimension);
	double width = properties->getMaxValue(dimension) - properties->getMinValue(dimension);


	if (contState < partitions[0] && properties->getPeriodicity(dimension))
	{
		contState += width;
	}

	unsigned int activeFeature = 0;
	unsigned int featureNum = 0, realfeatureNum = 0;

	int featureIndex = 0;

	double part = partitions[activeFeature];

	while (activeFeature < numFeatures && part < contState)
	{
		activeFeature++;

		if (activeFeature < numFeatures)
		{
			part = partitions[activeFeature];
		}

		if (part < partitions[0])
		{
			assert(properties->getPeriodicity(dimension));
			part += width;
		}
	}

	
	if (activeFeature == numFeatures && !properties->getPeriodicity(dimension))
	{
		featureNum ++;
	}

	DebugPrint('l', "Single State Features: [");	
	for (; realfeatureNum < this->numActiveFeatures; realfeatureNum++, featureNum ++)
	{
		if (featureNum % 2 == 0)
		{
			featureIndex = activeFeature + featureNum / 2;
		}
		else
		{
			featureIndex = activeFeature - (featureNum / 2 + 1);
		}

		if (state->getStateProperties()->getPeriodicity(dimension))
		{
			featureIndex = featureIndex % numFeatures;
		}
		
		if (featureIndex >= 0 && featureIndex < (signed int) numFeatures)
		{
			featState->setDiscreteState(realfeatureNum, featureIndex);
			
			double stateDiff = state->getSingleStateDifference(dimension, partitions[featureIndex]);
			
			double diffNextPart = 1.0;	
					
			if (!state->getStateProperties()->getPeriodicity(dimension))
			{
				if (featureIndex == 0 && stateDiff <= 0)
				{
					stateDiff = 0;
				}
				else
				{
					if (featureIndex == (signed int) (numFeatures - 1) && stateDiff > 0)
					{
						stateDiff = 0;
					}
					else
					{
						if (stateDiff <= 0)
						{
							diffNextPart = partitions[featureIndex] - partitions[featureIndex - 1];
						}
						else
						{
							diffNextPart = partitions[featureIndex + 1] - partitions[featureIndex];
						}
					}
				}

			}
			else
			{

				if (stateDiff <= 0)
				{
					diffNextPart = partitions[featureIndex] - partitions[(numPartitions + featureIndex - 1) % numPartitions];
				}
				else
				{
					diffNextPart = partitions[(featureIndex + 1) % numPartitions] - partitions[featureIndex];
				}				
				
				if (diffNextPart < 0)
				{
					diffNextPart += width; 
				}
				if (diffNextPart > 0)
				{
					diffNextPart -= width;
				}
			}
			
			featState->setContinuousState(realfeatureNum, getFeatureFactor(featureIndex, stateDiff, diffNextPart));
			
			DebugPrint('l', "%f %f, ", partitions[featureIndex], featState->getContinuousState(realfeatureNum));
		}
		else
		{
			featState->setContinuousState(realfeatureNum, 0.0);
			featState->setDiscreteState(realfeatureNum, 0);
		}
	}
	this->normalizeFeatures(featState);
	DebugPrint('l', "]\n");	
}
Esempio n. 14
0
void CFeatureRewardModel::nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState)
{
	CFeatureMap *featMap;

	CState *oldS = oldState->getState(properties);
	CState *newS = newState->getState(properties);

	double oldreward = 0.0;
	double visits = 0.0;

	int actionIndex = getActions()->getIndex(action);
	
	int type = oldS->getStateProperties()->getType() & (DISCRETESTATE | FEATURESTATE);
	switch (type)
	{
		case FEATURESTATE:
		{
			for (unsigned int oldIndex = 0; oldIndex < oldS->getNumDiscreteStates(); oldIndex++)
			{
				int oldFeature = oldS->getDiscreteState(oldIndex);
				featMap = rewardTable->get(actionIndex, oldFeature);

				for (unsigned int newIndex = 0; newIndex < newS->getNumDiscreteStates(); newIndex++)
				{
					int newFeature = newS->getDiscreteState(newIndex);

					oldreward = featMap->getValue(newFeature);
				

					(*featMap)[newFeature] = oldreward + reward * newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);
					
					if (!bExternVisitSparse)
					{
						visits = visitTable->get(actionIndex, oldFeature)->getValue(newFeature);
					
						(*visitTable->get(actionIndex, oldFeature))[newFeature] = visits + newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);;
					}
				}
			}
			break;
		}
		case DISCRETESTATE:
		default: 
		{
			featMap = rewardTable->get(actionIndex, oldS->getDiscreteStateNumber());

			oldreward = featMap->getValue(newS->getDiscreteStateNumber());
		
			int feata = oldS->getDiscreteStateNumber();
			int featb = newS->getDiscreteStateNumber();

			(*featMap)[featb] = oldreward + reward;
			
			if (!bExternVisitSparse)
			{
				visits = visitTable->get(actionIndex, feata)->getValue(featb);
			
				(*visitTable->get(actionIndex, feata))[featb] = visits + 1.0;
			}
			break;
		}
	}
}