void SGDFeedbackSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_cpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); if (local_decay) { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->cpu_data(), history_[param_id]->mutable_cpu_data()); } // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); if (local_decay) { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->gpu_data(), history_[param_id]->mutable_gpu_data()); } // copy caffe_gpu_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
TYPED_TEST(MathFunctionsTest, TestCopyGPU) { const int n = this->blob_bottom_->count(); const TypeParam* bottom_data = this->blob_bottom_->gpu_data(); TypeParam* top_data = this->blob_top_->mutable_gpu_data(); caffe_gpu_copy(n, bottom_data, top_data); bottom_data = this->blob_bottom_->cpu_data(); top_data = this->blob_top_->mutable_cpu_data(); for (int i = 0; i < n; ++i) { EXPECT_EQ(bottom_data[i], top_data[i]); } }
void SGDSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); // get the learning rate Dtype rate = GetLearningRate(); Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); // LOG(ERROR) << "rate:" << rate << " momentum:" << momentum // << " weight_decay:" << weight_decay; switch (Caffe::mode()) { case Caffe::CPU: for (size_t param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. caffe_axpby(net_params[param_id]->count(), rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); if (weight_decay) { // add weight decay caffe_axpy(net_params[param_id]->count(), weight_decay * rate, net_params[param_id]->cpu_data(), history_[param_id]->mutable_cpu_data()); } // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: for (size_t param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. caffe_gpu_axpby(net_params[param_id]->count(), rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); if (weight_decay) { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), weight_decay * rate, net_params[param_id]->gpu_data(), history_[param_id]->mutable_gpu_data()); } // copy caffe_gpu_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void SGDSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<string>& net_params_lr_policy = this->net_->params_lr_policy(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); if (local_decay) { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->cpu_data(), history_[param_id]->mutable_cpu_data()); } // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: //LOG(INFO) << "Installing local lr policy"; for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate; if(net_params_lr_policy[param_id] == "naive_inv") { local_rate = rate * net_params_lr[param_id] * Dtype(1.0)/(this->iter_/500 + 1); //LOG(INFO) << "rate: " << rate << " local rate: " << net_params_lr[param_id] << " inv coeff: " << Dtype(1.0)/(this->iter_/500 + 1) << " hehe: " << (this->iter_/500 + 1); } else if (net_params_lr_policy[param_id] == "power_inv") { local_rate = rate * net_params_lr[param_id] * pow(Dtype(1.0) + this->param_.localgamma() * this->iter_, - this->param_.localpower()); //LOG(INFO) << "local rate: " << local_rate; } else if (net_params_lr_policy[param_id] == "step") { int current_step = this->iter_ / this->param_.localstepsize(); local_rate = rate * net_params_lr[param_id] * pow(this->param_.localgamma(), current_step); } else if (net_params_lr_policy[param_id] == "nothing") local_rate = rate * net_params_lr[param_id]; else LOG(FATAL) << "Unknown caffe local policy: " << net_params_lr_policy[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); if (local_decay) { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay * local_rate, net_params[param_id]->gpu_data(), history_[param_id]->mutable_gpu_data()); } // copy caffe_gpu_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }