Beispiel #1
0
// virtual
void GKNN::enableIncrementalLearning(sp_relation& pRelation, int labelDims, double* pMins, double* pRanges)
{
	clear();
	m_pRelation = pRelation;
	m_labelDims = labelDims;
	m_featureDims = pRelation->size() - m_labelDims;
	m_pInstances = new GData(m_pRelation);
	GMixedRelation* pMixedRel = new GMixedRelation();
	pMixedRel->addAttrs(pRelation.get(), 0, pRelation->size() - labelDims);
	sp_relation pRelInputs = pMixedRel;
	m_pDistanceMetric->init(pRelInputs);

	// Allocate some other buffers
	int maxOutputValueCount = 0;
	for(int n = 0; n < m_labelDims; n++)
		maxOutputValueCount = MAX(maxOutputValueCount, pRelation->valueCount(m_featureDims + n));
	m_pValueCounts = new double[maxOutputValueCount];

	// Scale factor optimization
	if(m_optimizeScaleFactors)
	{
		m_pCritic = new GKnnScaleFactorCritic(this, m_labelDims, m_featureDims);
		m_pScaleFactorOptimizer = new GMomentumGreedySearch(m_pCritic);
	}
	else
	{
		m_pCritic = NULL;
		m_pScaleFactorOptimizer = NULL;
	}
}
GIncrementalLearnerQAgent::GIncrementalLearnerQAgent(sp_relation& pObsControlRelation, GIncrementalLearner* pQTable, int actionDims, double* pInitialState, GRand* pRand, GAgentActionIterator* pActionIterator, double softMaxThresh)
: GQLearner(pObsControlRelation, actionDims, pInitialState, pRand, pActionIterator)
{
	// Enable incremental learning
	m_pQTable = pQTable;
	GMixedRelation* pQRelation = new GMixedRelation();
	pQRelation->addAttrs(pObsControlRelation.get());
	pQRelation->addAttr(0); // the Q-value
	sp_relation pRelQtable;
	pRelQtable = pQRelation;
	pQTable->enableIncrementalLearning(pRelQtable, 1, NULL, NULL);

	// Init other stuff
	m_pBuf = new double[pQRelation->size()];
	m_softMaxThresh = softMaxThresh;
	m_pActionIterator = pActionIterator;
	pActionIterator->reset(pInitialState);
}
Beispiel #3
0
void transition(GArgReader& args)
{
	// Load the input data
	GMatrix* pActions = loadData(args.pop_string());
	Holder<GMatrix> hActions(pActions);
	GMatrix* pState = loadData(args.pop_string());
	Holder<GMatrix> hState(pState);
	if(pState->rows() != pActions->rows())
		ThrowError("Expected the same number of rows in both datasets");

	// Parse options
	bool delta = false;
	while(args.size() > 0)
	{
		if(args.if_pop("-delta"))
			delta = true;
		else
			ThrowError("Invalid option: ", args.peek());
	}

	// Make the output data
	size_t actionDims = pActions->cols();
	size_t stateDims = pState->cols();
	GMixedRelation* pRelation = new GMixedRelation();
	sp_relation pRel = pRelation;
	pRelation->addAttrs(pActions->relation().get());
	pRelation->addAttrs(stateDims + stateDims, 0);
	GMatrix* pTransition = new GMatrix(pRel);
	pTransition->newRows(pActions->rows() - 1);
	for(size_t i = 0; i < pActions->rows() - 1; i++)
	{
		double* pOut = pTransition->row(i);
		GVec::copy(pOut, pActions->row(i), actionDims);
		GVec::copy(pOut + actionDims, pState->row(i), stateDims);
		GVec::copy(pOut + actionDims + stateDims, pState->row(i + 1), stateDims);
		if(delta)
			GVec::subtract(pOut + actionDims + stateDims, pState->row(i), stateDims);
	}
	pTransition->print(cout);
}
Beispiel #4
0
	CarOnHillModel(GRand* prng, GImage* pImage, GWidgetTextLabel* pWins)
	{
		m_pWins = pWins;
		m_wins = 0;
		m_pImage = pImage;
		m_carPos = 0;
		m_velocity = 0;
		m_prng = prng;

		// Load the car image and add some border so we can rotate it
		GImage tmp;
		tmp.loadPng("minicar.png");
		m_pCar = new GImage();
		m_pCar->setSize(70, 60);
		GRect r(0, 0, 60, 36);
		m_pCar->blit(5, 5, &tmp, &r);
		m_pRotatedCar = new GImage();

		// Make the agent
		GMixedRelation* pRelAgent = new GMixedRelation();
		sp_relation relAgent;
		relAgent = pRelAgent;
		pRelAgent->addAttr(0); // position
		pRelAgent->addAttr(0); // velocity
		pRelAgent->addAttr(2); // action {forward, reverse}
		double initialState[2];
		initialState[0] = m_carPos;
		initialState[1] = m_velocity;
		double goalState[2];
		goalState[0] = 2;
		goalState[1] = 0;
		m_pActionIterator = new GDiscreteActionIterator(2);
		m_pAgents[0] = new CarQAgent(relAgent, initialState, m_prng, m_pActionIterator);
		((GQLearner*)m_pAgents[0])->setLearningRate(.9);
		((GQLearner*)m_pAgents[0])->setDiscountFactor(0.999);
	}
Beispiel #5
0
void DoTest(GRand* prng, double rcoeff, double alpha, double gamma, double reward, double penalty, double soft_max_thresh, bool warpRandom)
{
    printf("---------------------------------------\n");
    printf("rcoeff=%lg, alpha=%lg, gamma=%lg, reward=%lg, penalty=%lg, softMaxThresh=%lg, warpRandom=%s\n", rcoeff, alpha, gamma, reward, penalty, soft_max_thresh, warpRandom ? "true" : "false");
    printf("---------------------------------------\n");
    fflush(stdout);
    soft_max_thresh /= 100;
    double soberness = ((rcoeff * 4 / 100) - 1) / 3;
    sp_relation rel;
    GMixedRelation* pRel = new GMixedRelation();
    rel = pRel;
    pRel->addAttr(0); // x
    pRel->addAttr(0); // y
    pRel->addAttr(4); // {E,N,W,S}
    GDiscreteActionIterator it(4);
    double initialstate[2];
    initialstate[0] = 0;
    initialstate[1] = 0;
    TestQAgent agent(rel, initialstate, prng, &it, soft_max_thresh, soberness, reward, penalty, warpRandom);
    agent.setLearningRate(alpha);
    agent.setDiscountFactor(gamma);
    while(agent.GetJourneyCount() < 10000)
        agent.Iterate();
}