コード例 #1
0
void AdaDeltaParameterOptimizer::update(const VectorPtr vecs[],
                                        const ParameterConfig& config,
                                        size_t sparseId) const {
  CHECK(sparseId == -1LU) << "Sparse update is not supported";

  BaseMatrix& value = *vecs[PARAMETER_VALUE];
  BaseMatrix& grad = *vecs[PARAMETER_GRADIENT];
  BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM];
  BaseMatrix& accum = *vecs[PARAMETER_GRADIENT_SQURESUM];
  BaseMatrix& accum_update = *vecs[PARAMETER_GRADIENT_SQURESUM1];
  BaseMatrix& lr = *vecs[PARAMETER_LEARNING_RATE];

  real learningRate = learningRate_ * config.learning_rate();
  real momentum = config.momentum();
  real decayRate = applyDecay_ ? config.decay_rate() : 0;

  adadeltaApply(value,
                grad,
                mom,
                accum,
                accum_update,
                lr,
                rou_,
                epsilon_,
                learningRate,
                momentum,
                decayRate);
}
コード例 #2
0
void SparseMomentumParameterOptimizer::update(const VectorPtr vecs[],
                                              const ParameterConfig& paraConfig,
                                              size_t sparseId) const {
  if (sparseId != -1LU) {
    CHECK_LT(sparseId, t0Vec_.size());
    if (t0Vec_[sparseId] == 0) {
      vecs[PARAMETER_MOMENTUM_VT]->assign(*vecs[PARAMETER_VALUE]);
      t0Vec_[sparseId] = 1;
    }
    vecs[PARAMETER_MOMENTUM_UT]->add(*vecs[PARAMETER_GRADIENT],
                                     -alpha_ * gamma_ * learningRate_);
    vecs[PARAMETER_MOMENTUM_VT]->add(*vecs[PARAMETER_GRADIENT],
                                     tau_ * alpha_ * gamma_ * learningRate_);
    vecs[PARAMETER_VALUE]->add(*vecs[PARAMETER_MOMENTUM_UT],
                               tau_ / beta_ + 1.0 / alpha_,
                               *vecs[PARAMETER_MOMENTUM_VT],
                               1.0 / beta_);

  } else {
    vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT],
                                     *vecs[PARAMETER_MOMENTUM],
                                     learningRate_ * paraConfig.learning_rate(),
                                     paraConfig.momentum(),
                                     applyDecay_ ? paraConfig.decay_rate() : 0);
  }
}
コード例 #3
0
void AdagradParameterOptimizer::update(const VectorPtr vecs[],
                                       const ParameterConfig& config,
                                       size_t sparseId) const {
  BaseMatrix& value = *vecs[PARAMETER_VALUE];
  BaseMatrix& grad = *vecs[PARAMETER_GRADIENT];
  BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM];
  BaseMatrix& accum_buffer = *vecs[PARAMETER_GRADIENT_SQURESUM];
  BaseMatrix& accum = *vecs[PARAMETER_GRADIENT_SQURESUM1];
  BaseMatrix& lr = *vecs[PARAMETER_LEARNING_RATE];

  real epsilon = optConfig_.ada_epsilon();
  real learningRate = learningRate_ * config.learning_rate();
  real momentum = config.momentum();
  real decayRate = applyDecay_ ? config.decay_rate() : 0;

  adagradApply(value,
               grad,
               mom,
               accum_buffer,
               accum,
               lr,
               epsilon,
               learningRate,
               momentum,
               decayRate);
}
コード例 #4
0
void OptimizerWithRegularizerEveryNumBatches::doTraversal(
    const VectorPtr vecs[], const ParameterConfig& config) const {
  int32_t base =
      std::max(baseTimer_, (timer_ + 1 - config.num_batches_regularization()));
  regularizer_->update(
      vecs, config, optimizer_->getLearningRate(), base, timer_ + 1);
}
コード例 #5
0
void AdamParameterOptimizer::update(const VectorPtr vecs[],
                                    const ParameterConfig& config,
                                    size_t sparseId) const {
  CHECK(sparseId == -1UL) << "Sparse update is not supported";

  real beta1_power = std::pow(beta1_, step_);
  real beta2_power = std::pow(beta2_, step_);
  real learningRate = config.learning_rate() * learningRate_;

  BaseMatrix& value = *vecs[PARAMETER_VALUE];
  BaseMatrix& grad = *vecs[PARAMETER_GRADIENT];
  BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM];
  BaseMatrix& v = *vecs[PARAMETER_SECOND_MOMENTUM];

  adamApply(value,
            grad,
            mom,
            v,
            beta1_,
            beta2_,
            beta1_power,
            beta2_power,
            epsilon_,
            learningRate);
}
コード例 #6
0
void OptimizerWithRegularizerEveryNumBatches::catchUpWith(
    const VectorPtr vecs[],
    const ParameterConfig& config,
    size_t sparseId) const {
  int32_t base = timer_ - timer_ % config.num_batches_regularization();
  regularizer_->update(vecs,
                       config,
                       optimizer_->getLearningRate(),
                       std::max(base, baseTimer_),
                       timer_);
}
コード例 #7
0
void AdamaxParameterOptimizer::update(const VectorPtr vecs[],
                                      const ParameterConfig& config,
                                      size_t sparseId) const {
  CHECK(sparseId == -1UL) << "Sparse update is not supported";
  real learningRate = config.learning_rate() * learningRate_;

  BaseMatrix& value = *vecs[PARAMETER_VALUE];
  BaseMatrix& grad = *vecs[PARAMETER_GRADIENT];
  BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM];
  BaseMatrix& u = *vecs[PARAMETER_WEIGHTED_INFINITY_NORM];

  adamaxApply(value, grad, mom, u, beta1_, beta2_, step_, learningRate);
}
コード例 #8
0
void RMSPropParameterOptimizer::update(const VectorPtr vecs[],
                                       const ParameterConfig& config,
                                       size_t sparseId) const {
  BaseMatrix& value = *vecs[PARAMETER_VALUE];
  BaseMatrix& grad = *vecs[PARAMETER_GRADIENT];
  BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM];
  BaseMatrix& sum = *vecs[PARAMETER_GRADIENT_SQURESUM];
  BaseMatrix& sum1 = *vecs[PARAMETER_GRADIENT_SQURESUM1];
  BaseMatrix& lr = *vecs[PARAMETER_LEARNING_RATE];

  real accumulatedRou = rou_;
  bool firstTime = timer_ == 0;
  if (sparseId != -1LU) {
    CHECK_LT(sparseId, t0Vec_.size());
    accumulatedRou = std::pow(rou_, timer_ + 1 - t0Vec_[sparseId]);
    firstTime = t0Vec_[sparseId] == 0;
    t0Vec_[sparseId] = timer_ + 1;
  }

  real epsilon = optConfig_.ada_epsilon();
  real learningRate = learningRate_ * config.learning_rate();
  real momentum = config.momentum();
  real decayRate = applyDecay_ ? config.decay_rate() : 0;

  rmspropApply(value,
               grad,
               mom,
               sum,
               sum1,
               lr,
               accumulatedRou,
               rou_,
               epsilon,
               learningRate,
               momentum,
               decayRate,
               firstTime);
}
コード例 #9
0
void OptimizerWithGradientClipping::update(const VectorPtr vecs[],
                                           const ParameterConfig& config,
                                           size_t sparseId) const {
  real globalThreshold = optConfig_.gradient_clipping_threshold();
  real localThreshold = config.gradient_clipping_threshold();

  // Use local gradient clipping threshold if it's enabled,
  // otherwise using the global one.
  real threshold = localThreshold > 0.0f ? localThreshold : globalThreshold;
  std::string field = localThreshold > 0.0f ? "local" : "global";

  real maxAbsGrad = vecs[PARAMETER_GRADIENT]->getAbsMax();
  if (maxAbsGrad > threshold) {
    if (FLAGS_log_clipping) {
      real avgAbsGrad = vecs[PARAMETER_GRADIENT]->getAbsSum() /
                        vecs[PARAMETER_GRADIENT]->getSize();
      LOG(INFO) << "parameter=" << config.name() << " need clipping by "
                << field << " threshold=" << threshold
                << ", max grad=" << maxAbsGrad << ", avg grad=" << avgAbsGrad;
    }
    vecs[PARAMETER_GRADIENT]->clip(-threshold, threshold);
  }
  optimizer_->update(vecs, config, sparseId);
}
コード例 #10
0
std::shared_ptr<IParameterUpdaterHook> IParameterUpdaterHook::create(
    const ParameterConfig& paramConfig, int idx) {
  std::pair<std::string, int> key = {paramConfig.name(), idx};
  return g_hookCache_.get(
      key, [&] { return createImpl(paramConfig.update_hooks(idx)); });
}