Ejemplo n.º 1
0
void Exp3P::receiveReward( int armNum, double reward )
{
	_T[ armNum ]++;
	// calculate the feedback value

	incIter();
	 
	//update 
	double xHat = reward / _p[armNum]; 
	_w[armNum] +=  (_gamma / (3 * (double)_numOfArms )) * ( xHat + ( _eta / ( _p[armNum] * pow( (double)_numOfArms * _horizon,0.5 )) ) );

	/*
	double wsum = 0.0;
	for( int i=0; i<_numOfArms; i++ ) 
	{
		wsum += _w[i];
	}
	for( int i=0; i<_numOfArms; i++ ) 
	{
		_w[i] /= wsum;
	}
	*/

	updateithValue( armNum );		
}
Ejemplo n.º 2
0
//----------------------------------------------------------------
//----------------------------------------------------------------
    void Exp3::receiveReward( int armNum, AlphaReal reward )
    {
        _T[ armNum ]++;
        AlphaReal xHat = reward / _pHat[armNum];
        _X[ armNum ] += ( ( _gamma  * xHat )/ _numOfArms );
        incIter();
        updateithValue( armNum );               
    }
Ejemplo n.º 3
0
//----------------------------------------------------------------
//----------------------------------------------------------------
void Exp3G2::receiveReward( vector<AlphaReal> reward )
{
	incIter();
	 
	//update 
	for( int i=0; i < _numOfArms; i++ ) 
	{
		_w[i] += ( _eta * reward[i] );
	}

	//_w[armNum] += ( _eta * reward );

	updateithValue( 0 );		
}
Ejemplo n.º 4
0
int CalcAxis::calcAxis()
{
    minAbsValue = INT_MAX / 2;
    minAbsLeft = INT_MAX / 2;

    for(resetIter(); !isIterEnd(); incIter()){
        int curValue = calcWindow();
        if(abs(minAbsValue) > abs(curValue)){
            minAbsValue = curValue;
            minAbsLeft = curLeft;
        }
    }
    
    return minAbsLeft + dx / 2;
}
Ejemplo n.º 5
0
//----------------------------------------------------------------
//----------------------------------------------------------------
void Exp3G2::receiveReward( int armNum, AlphaReal reward )
{

	_T[ armNum ]++;
	// calculate the feedback value

	incIter();
	 
	//update 
	for( int i=0; i < _numOfArms; i++ ) 
	{
		_w[i] += ( _eta * reward );
	}

	//_w[armNum] += ( _eta * reward );

	updateithValue( armNum );		
}
Ejemplo n.º 6
0
int CalcAxis::calcAxis(){
    int prevLeft = minLeft;

    minValue = FLT_MAX;
    minLeft = INT_MAX;
    
    for(resetIter(); !isIterEnd(); incIter()){
        float curValue = calcWindow();
        
        if(std::abs(minValue) > std::abs(curValue)){
            minValue = curValue;
            minLeft = curLeft;
        }
    }
    
    if(minValue == FLT_MAX){
        minLeft = prevLeft;
        return INT_MAX;
    }
        
    return minLeft + dx / 2;
}