void Exp3P::receiveReward( int armNum, double reward ) { _T[ armNum ]++; // calculate the feedback value incIter(); //update double xHat = reward / _p[armNum]; _w[armNum] += (_gamma / (3 * (double)_numOfArms )) * ( xHat + ( _eta / ( _p[armNum] * pow( (double)_numOfArms * _horizon,0.5 )) ) ); /* double wsum = 0.0; for( int i=0; i<_numOfArms; i++ ) { wsum += _w[i]; } for( int i=0; i<_numOfArms; i++ ) { _w[i] /= wsum; } */ updateithValue( armNum ); }
//---------------------------------------------------------------- //---------------------------------------------------------------- void Exp3::receiveReward( int armNum, AlphaReal reward ) { _T[ armNum ]++; AlphaReal xHat = reward / _pHat[armNum]; _X[ armNum ] += ( ( _gamma * xHat )/ _numOfArms ); incIter(); updateithValue( armNum ); }
//---------------------------------------------------------------- //---------------------------------------------------------------- void Exp3G2::receiveReward( vector<AlphaReal> reward ) { incIter(); //update for( int i=0; i < _numOfArms; i++ ) { _w[i] += ( _eta * reward[i] ); } //_w[armNum] += ( _eta * reward ); updateithValue( 0 ); }
int CalcAxis::calcAxis() { minAbsValue = INT_MAX / 2; minAbsLeft = INT_MAX / 2; for(resetIter(); !isIterEnd(); incIter()){ int curValue = calcWindow(); if(abs(minAbsValue) > abs(curValue)){ minAbsValue = curValue; minAbsLeft = curLeft; } } return minAbsLeft + dx / 2; }
//---------------------------------------------------------------- //---------------------------------------------------------------- void Exp3G2::receiveReward( int armNum, AlphaReal reward ) { _T[ armNum ]++; // calculate the feedback value incIter(); //update for( int i=0; i < _numOfArms; i++ ) { _w[i] += ( _eta * reward ); } //_w[armNum] += ( _eta * reward ); updateithValue( armNum ); }
int CalcAxis::calcAxis(){ int prevLeft = minLeft; minValue = FLT_MAX; minLeft = INT_MAX; for(resetIter(); !isIterEnd(); incIter()){ float curValue = calcWindow(); if(std::abs(minValue) > std::abs(curValue)){ minValue = curValue; minLeft = curLeft; } } if(minValue == FLT_MAX){ minLeft = prevLeft; return INT_MAX; } return minLeft + dx / 2; }