void Exp3P::receiveReward( int armNum, double reward ) { _T[ armNum ]++; // calculate the feedback value incIter(); //update double xHat = reward / _p[armNum]; _w[armNum] += (_gamma / (3 * (double)_numOfArms )) * ( xHat + ( _eta / ( _p[armNum] * pow( (double)_numOfArms * _horizon,0.5 )) ) ); /* double wsum = 0.0; for( int i=0; i<_numOfArms; i++ ) { wsum += _w[i]; } for( int i=0; i<_numOfArms; i++ ) { _w[i] /= wsum; } */ updateithValue( armNum ); }
//---------------------------------------------------------------- //---------------------------------------------------------------- void Exp3::receiveReward( int armNum, AlphaReal reward ) { _T[ armNum ]++; AlphaReal xHat = reward / _pHat[armNum]; _X[ armNum ] += ( ( _gamma * xHat )/ _numOfArms ); incIter(); updateithValue( armNum ); }
//---------------------------------------------------------------- //---------------------------------------------------------------- void Exp3G2::receiveReward( vector<AlphaReal> reward ) { incIter(); //update for( int i=0; i < _numOfArms; i++ ) { _w[i] += ( _eta * reward[i] ); } //_w[armNum] += ( _eta * reward ); updateithValue( 0 ); }
//---------------------------------------------------------------- //---------------------------------------------------------------- void Exp3G2::receiveReward( int armNum, AlphaReal reward ) { _T[ armNum ]++; // calculate the feedback value incIter(); //update for( int i=0; i < _numOfArms; i++ ) { _w[i] += ( _eta * reward ); } //_w[armNum] += ( _eta * reward ); updateithValue( armNum ); }