Esempio n. 1
0
void Exp3P::receiveReward( int armNum, double reward )
{
	_T[ armNum ]++;
	// calculate the feedback value

	incIter();
	 
	//update 
	double xHat = reward / _p[armNum]; 
	_w[armNum] +=  (_gamma / (3 * (double)_numOfArms )) * ( xHat + ( _eta / ( _p[armNum] * pow( (double)_numOfArms * _horizon,0.5 )) ) );

	/*
	double wsum = 0.0;
	for( int i=0; i<_numOfArms; i++ ) 
	{
		wsum += _w[i];
	}
	for( int i=0; i<_numOfArms; i++ ) 
	{
		_w[i] /= wsum;
	}
	*/

	updateithValue( armNum );		
}
Esempio n. 2
0
//----------------------------------------------------------------
//----------------------------------------------------------------
    void Exp3::receiveReward( int armNum, AlphaReal reward )
    {
        _T[ armNum ]++;
        AlphaReal xHat = reward / _pHat[armNum];
        _X[ armNum ] += ( ( _gamma  * xHat )/ _numOfArms );
        incIter();
        updateithValue( armNum );               
    }
Esempio n. 3
0
//----------------------------------------------------------------
//----------------------------------------------------------------
void Exp3G2::receiveReward( vector<AlphaReal> reward )
{
	incIter();
	 
	//update 
	for( int i=0; i < _numOfArms; i++ ) 
	{
		_w[i] += ( _eta * reward[i] );
	}

	//_w[armNum] += ( _eta * reward );

	updateithValue( 0 );		
}
Esempio n. 4
0
//----------------------------------------------------------------
//----------------------------------------------------------------
void Exp3G2::receiveReward( int armNum, AlphaReal reward )
{

	_T[ armNum ]++;
	// calculate the feedback value

	incIter();
	 
	//update 
	for( int i=0; i < _numOfArms; i++ ) 
	{
		_w[i] += ( _eta * reward );
	}

	//_w[armNum] += ( _eta * reward );

	updateithValue( armNum );		
}