void SGDFeedbackSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_cpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); if (local_decay) { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->cpu_data(), history_[param_id]->mutable_cpu_data()); } // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); if (local_decay) { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->gpu_data(), history_[param_id]->mutable_gpu_data()); } // copy caffe_gpu_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void SGDSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); // get the learning rate Dtype rate = GetLearningRate(); Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); // LOG(ERROR) << "rate:" << rate << " momentum:" << momentum // << " weight_decay:" << weight_decay; switch (Caffe::mode()) { case Caffe::CPU: for (size_t param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. caffe_axpby(net_params[param_id]->count(), rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); if (weight_decay) { // add weight decay caffe_axpy(net_params[param_id]->count(), weight_decay * rate, net_params[param_id]->cpu_data(), history_[param_id]->mutable_cpu_data()); } // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: for (size_t param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. caffe_gpu_axpby(net_params[param_id]->count(), rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); if (weight_decay) { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), weight_decay * rate, net_params[param_id]->gpu_data(), history_[param_id]->mutable_gpu_data()); } // copy caffe_gpu_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void SGDSolver<Dtype>::ApplyUpdate() { Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << this->iter_ << ", lr = " << rate; } ClipGradients(); for (int param_id = 0; param_id < this->net_->learnable_params().size(); ++param_id) { Normalize(param_id); Regularize(param_id); ComputeUpdateValue(param_id, rate); } this->net_->Update(); }
void SGDSolver<Dtype>::ApplyUpdate() { Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << this->iter_ << ", lr = " << rate; } ClipGradients(); for (int param_id = 0; param_id < this->net_->learnable_params().size(); ++param_id) { Normalize(param_id); Regularize(param_id); ComputeUpdateValue(param_id, rate); } this->net_->Update(); // Increment the internal iter_ counter -- its value should always indicate // the number of times the weights have been updated. ++this->iter_; }
void SGDSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = GetLearningRate(); Dtype momentum = this->param_.momentum(); if (this->param_.momentum_burnin() > this->iter_) { momentum = momentum * this->iter_ / this->param_.momentum_burnin(); } if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate << ", mom = " << momentum; } Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } caffe_cpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: #ifndef CPU_ONLY for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } #else NO_GPU; #endif break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void SGDSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<string>& net_params_lr_policy = this->net_->params_lr_policy(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); if (local_decay) { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->cpu_data(), history_[param_id]->mutable_cpu_data()); } // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: //LOG(INFO) << "Installing local lr policy"; for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate; if(net_params_lr_policy[param_id] == "naive_inv") { local_rate = rate * net_params_lr[param_id] * Dtype(1.0)/(this->iter_/500 + 1); //LOG(INFO) << "rate: " << rate << " local rate: " << net_params_lr[param_id] << " inv coeff: " << Dtype(1.0)/(this->iter_/500 + 1) << " hehe: " << (this->iter_/500 + 1); } else if (net_params_lr_policy[param_id] == "power_inv") { local_rate = rate * net_params_lr[param_id] * pow(Dtype(1.0) + this->param_.localgamma() * this->iter_, - this->param_.localpower()); //LOG(INFO) << "local rate: " << local_rate; } else if (net_params_lr_policy[param_id] == "step") { int current_step = this->iter_ / this->param_.localstepsize(); local_rate = rate * net_params_lr[param_id] * pow(this->param_.localgamma(), current_step); } else if (net_params_lr_policy[param_id] == "nothing") local_rate = rate * net_params_lr[param_id]; else LOG(FATAL) << "Unknown caffe local policy: " << net_params_lr_policy[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); if (local_decay) { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->gpu_data(), history_[param_id]->mutable_gpu_data()); } // copy caffe_gpu_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }