void OptimizerWithGradientClipping::update(const VectorPtr vecs[], const ParameterConfig& config, size_t sparseId) const { real globalThreshold = optConfig_.gradient_clipping_threshold(); real localThreshold = config.gradient_clipping_threshold(); // Use local gradient clipping threshold if it's enabled, // otherwise using the global one. real threshold = localThreshold > 0.0f ? localThreshold : globalThreshold; std::string field = localThreshold > 0.0f ? "local" : "global"; real maxAbsGrad = vecs[PARAMETER_GRADIENT]->getAbsMax(); if (maxAbsGrad > threshold) { if (FLAGS_log_clipping) { real avgAbsGrad = vecs[PARAMETER_GRADIENT]->getAbsSum() / vecs[PARAMETER_GRADIENT]->getSize(); LOG(INFO) << "parameter=" << config.name() << " need clipping by " << field << " threshold=" << threshold << ", max grad=" << maxAbsGrad << ", avg grad=" << avgAbsGrad; } vecs[PARAMETER_GRADIENT]->clip(-threshold, threshold); } optimizer_->update(vecs, config, sparseId); }
std::shared_ptr<IParameterUpdaterHook> IParameterUpdaterHook::create( const ParameterConfig& paramConfig, int idx) { std::pair<std::string, int> key = {paramConfig.name(), idx}; return g_hookCache_.get( key, [&] { return createImpl(paramConfig.update_hooks(idx)); }); }