Пример #1
0
void CFeatureOperatorAnd::getModifiedState(CStateCollection *stateCol, CState *featState)
{
	int featureOffset = 1;

	std::list<CStateModifier *>::iterator it = getStateModifiers()->begin();
	std::list<CState *>::iterator stateIt = states->begin();


	CState *stateBuf;

	for (unsigned int i = 0; i < getNumDiscreteStates();i ++)
	{
		featState->setDiscreteState(i, 0);
		featState->setContinuousState(i, 1.0);
	}

	int repetitions = getNumDiscreteStates();
	for (int j = 0; it != getStateModifiers()->end(); it ++, stateIt ++, j ++)
	{
		repetitions /= (*it)->getNumDiscreteStates();
		stateBuf = NULL;
		if (stateCol->isMember(*it))
		{
			stateBuf = stateCol->getState(*it);
		}
		else
		{
			stateBuf = *stateIt;
			(*it)->getModifiedState(stateCol, stateBuf);
		}
		
		if (stateBuf->getStateProperties()->isType(FEATURESTATE))
		{
			for (unsigned int i = 0; i < getNumDiscreteStates(); i++)
			{
				unsigned int singleStateFeatureNum = (i / repetitions) % stateBuf->getNumDiscreteStates();
				featState->setDiscreteState(i, featState->getDiscreteState(i) + featureOffset * stateBuf->getDiscreteState(singleStateFeatureNum));
				featState->setContinuousState(i, featState->getContinuousState(i) * stateBuf->getContinuousState(singleStateFeatureNum));
			}
		}
		else
		{
			for (unsigned int i = 0; i < getNumDiscreteStates(); i++)
			{
				featState->setDiscreteState(i, featState->getDiscreteState(i) + featureOffset * stateBuf->getDiscreteState(0));				
			}
		}

		featureOffset = featureOffset * (*it)->getDiscreteStateSize();
	}
	normalizeFeatures(featState);
}
Пример #2
0
void CQAverageTDErrorLearner::receiveError(double error, CStateCollection *state, CAction *action, CActionData *)
{
	CState *featureState = state->getState(averageErrorFunction->getFeatureCalculator());
	
	for (unsigned int i = 0; i < featureState->getNumDiscreteStates(); i++)
	{
		int index = featureState->getDiscreteState(i);
		double featureFac = featureState->getContinuousState(i);
		double featureVal = averageErrorFunction->getValue(index, action);
				
		featureVal = featureVal * (updateRate  + (1 - featureFac) * (1 - updateRate)) + error * (1 - updateRate) * featureFac;
		averageErrorFunction->setValue(index, action, featureVal);
	}
}
Пример #3
0
void CFeatureOperatorOr::getModifiedState(CStateCollection *stateCol, CState *state)
{
	assert(bInit);

	std::list<CStateModifier *>::iterator it = getStateModifiers()->begin();
	std::list<CState *>::iterator stateIt = states->begin();

	CState *stateBuf;

	int i = 0;
	int numFeatures = 0;

	for (; it != getStateModifiers()->end(); it++, stateIt++)
	{
		if (stateCol->isMember(*it))
		{
			stateBuf = stateCol->getState(*it);
		}
		else
		{
			stateBuf = *stateIt;
			(*it)->getModifiedState(stateCol, stateBuf);
		}
		double featureStateFactor = (*this->featureFactors)[*it];
		if (stateBuf->getStateProperties()->isType(FEATURESTATE))
		{
			for (unsigned int j = 0; j < stateBuf->getNumDiscreteStates(); j++)
			{
				state->setDiscreteState(i, stateBuf->getDiscreteState(j) + numFeatures);
				state->setContinuousState(i, stateBuf->getContinuousState(j) * featureStateFactor);
				i ++;
			}
		}
		else
		{
			if (stateBuf->getStateProperties()->isType(DISCRETESTATE))
			{
				state->setDiscreteState(i, stateBuf->getDiscreteState(0) + numFeatures);
				state->setContinuousState(i, featureStateFactor);
				i ++;
			}
		}

		numFeatures += (*it)->getDiscreteStateSize();
	}
	normalizeFeatures(state);
}
Пример #4
0
void CFeatureRewardModel::nextStep(CStateCollection *oldState, CAction *action, double reward, CStateCollection *newState)
{
	CFeatureMap *featMap;

	CState *oldS = oldState->getState(properties);
	CState *newS = newState->getState(properties);

	double oldreward = 0.0;
	double visits = 0.0;

	int actionIndex = getActions()->getIndex(action);
	
	int type = oldS->getStateProperties()->getType() & (DISCRETESTATE | FEATURESTATE);
	switch (type)
	{
		case FEATURESTATE:
		{
			for (unsigned int oldIndex = 0; oldIndex < oldS->getNumDiscreteStates(); oldIndex++)
			{
				int oldFeature = oldS->getDiscreteState(oldIndex);
				featMap = rewardTable->get(actionIndex, oldFeature);

				for (unsigned int newIndex = 0; newIndex < newS->getNumDiscreteStates(); newIndex++)
				{
					int newFeature = newS->getDiscreteState(newIndex);

					oldreward = featMap->getValue(newFeature);
				

					(*featMap)[newFeature] = oldreward + reward * newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);
					
					if (!bExternVisitSparse)
					{
						visits = visitTable->get(actionIndex, oldFeature)->getValue(newFeature);
					
						(*visitTable->get(actionIndex, oldFeature))[newFeature] = visits + newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);;
					}
				}
			}
			break;
		}
		case DISCRETESTATE:
		default: 
		{
			featMap = rewardTable->get(actionIndex, oldS->getDiscreteStateNumber());

			oldreward = featMap->getValue(newS->getDiscreteStateNumber());
		
			int feata = oldS->getDiscreteStateNumber();
			int featb = newS->getDiscreteStateNumber();

			(*featMap)[featb] = oldreward + reward;
			
			if (!bExternVisitSparse)
			{
				visits = visitTable->get(actionIndex, feata)->getValue(featb);
			
				(*visitTable->get(actionIndex, feata))[featb] = visits + 1.0;
			}
			break;
		}
	}
}