コード例 #1
0
void SparseMomentumParameterOptimizer::update(const VectorPtr vecs[],
                                              const ParameterConfig& paraConfig,
                                              size_t sparseId) const {
  if (sparseId != -1LU) {
    CHECK_LT(sparseId, t0Vec_.size());
    if (t0Vec_[sparseId] == 0) {
      vecs[PARAMETER_MOMENTUM_VT]->assign(*vecs[PARAMETER_VALUE]);
      t0Vec_[sparseId] = 1;
    }
    vecs[PARAMETER_MOMENTUM_UT]->add(*vecs[PARAMETER_GRADIENT],
                                     -alpha_ * gamma_ * learningRate_);
    vecs[PARAMETER_MOMENTUM_VT]->add(*vecs[PARAMETER_GRADIENT],
                                     tau_ * alpha_ * gamma_ * learningRate_);
    vecs[PARAMETER_VALUE]->add(*vecs[PARAMETER_MOMENTUM_UT],
                               tau_ / beta_ + 1.0 / alpha_,
                               *vecs[PARAMETER_MOMENTUM_VT],
                               1.0 / beta_);

  } else {
    vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT],
                                     *vecs[PARAMETER_MOMENTUM],
                                     learningRate_ * paraConfig.learning_rate(),
                                     paraConfig.momentum(),
                                     applyDecay_ ? paraConfig.decay_rate() : 0);
  }
}
コード例 #2
0
void AdaDeltaParameterOptimizer::update(const VectorPtr vecs[],
                                        const ParameterConfig& config,
                                        size_t sparseId) const {
  CHECK(sparseId == -1LU) << "Sparse update is not supported";

  BaseMatrix& value = *vecs[PARAMETER_VALUE];
  BaseMatrix& grad = *vecs[PARAMETER_GRADIENT];
  BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM];
  BaseMatrix& accum = *vecs[PARAMETER_GRADIENT_SQURESUM];
  BaseMatrix& accum_update = *vecs[PARAMETER_GRADIENT_SQURESUM1];
  BaseMatrix& lr = *vecs[PARAMETER_LEARNING_RATE];

  real learningRate = learningRate_ * config.learning_rate();
  real momentum = config.momentum();
  real decayRate = applyDecay_ ? config.decay_rate() : 0;

  adadeltaApply(value,
                grad,
                mom,
                accum,
                accum_update,
                lr,
                rou_,
                epsilon_,
                learningRate,
                momentum,
                decayRate);
}
コード例 #3
0
void AdagradParameterOptimizer::update(const VectorPtr vecs[],
                                       const ParameterConfig& config,
                                       size_t sparseId) const {
  BaseMatrix& value = *vecs[PARAMETER_VALUE];
  BaseMatrix& grad = *vecs[PARAMETER_GRADIENT];
  BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM];
  BaseMatrix& accum_buffer = *vecs[PARAMETER_GRADIENT_SQURESUM];
  BaseMatrix& accum = *vecs[PARAMETER_GRADIENT_SQURESUM1];
  BaseMatrix& lr = *vecs[PARAMETER_LEARNING_RATE];

  real epsilon = optConfig_.ada_epsilon();
  real learningRate = learningRate_ * config.learning_rate();
  real momentum = config.momentum();
  real decayRate = applyDecay_ ? config.decay_rate() : 0;

  adagradApply(value,
               grad,
               mom,
               accum_buffer,
               accum,
               lr,
               epsilon,
               learningRate,
               momentum,
               decayRate);
}
コード例 #4
0
void RMSPropParameterOptimizer::update(const VectorPtr vecs[],
                                       const ParameterConfig& config,
                                       size_t sparseId) const {
  BaseMatrix& value = *vecs[PARAMETER_VALUE];
  BaseMatrix& grad = *vecs[PARAMETER_GRADIENT];
  BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM];
  BaseMatrix& sum = *vecs[PARAMETER_GRADIENT_SQURESUM];
  BaseMatrix& sum1 = *vecs[PARAMETER_GRADIENT_SQURESUM1];
  BaseMatrix& lr = *vecs[PARAMETER_LEARNING_RATE];

  real accumulatedRou = rou_;
  bool firstTime = timer_ == 0;
  if (sparseId != -1LU) {
    CHECK_LT(sparseId, t0Vec_.size());
    accumulatedRou = std::pow(rou_, timer_ + 1 - t0Vec_[sparseId]);
    firstTime = t0Vec_[sparseId] == 0;
    t0Vec_[sparseId] = timer_ + 1;
  }

  real epsilon = optConfig_.ada_epsilon();
  real learningRate = learningRate_ * config.learning_rate();
  real momentum = config.momentum();
  real decayRate = applyDecay_ ? config.decay_rate() : 0;

  rmspropApply(value,
               grad,
               mom,
               sum,
               sum1,
               lr,
               accumulatedRou,
               rou_,
               epsilon,
               learningRate,
               momentum,
               decayRate,
               firstTime);
}