void SGDSolver<Dtype>::Regularize(int param_id) { const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; switch (Caffe::mode()) { case Caffe::CPU: { if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } break; } case Caffe::GPU: { #ifndef CPU_ONLY if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } #else NO_GPU; #endif break; } default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void AdaGradSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = this->GetLearningRate(); Dtype delta = this->param_.delta(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), this->temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // compute square of gradient in update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history caffe_add(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), this->history_[param_id]->cpu_data(), this->history_[param_id]->mutable_cpu_data()); // prepare update caffe_powx(net_params[param_id]->count(), this->history_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); caffe_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_cpu_data()); caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); // scale and copy caffe_cpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->cpu_data(), Dtype(0), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: #ifndef CPU_ONLY for (int param_id = 0; param_id < net_params.size(); ++param_id) { Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), this->temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // compute square of gradient in update caffe_gpu_powx(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), Dtype(2), this->update_[param_id]->mutable_gpu_data()); // update history caffe_gpu_add(net_params[param_id]->count(), this->update_[param_id]->gpu_data(), this->history_[param_id]->gpu_data(), this->history_[param_id]->mutable_gpu_data()); // prepare update caffe_gpu_powx(net_params[param_id]->count(), this->history_[param_id]->gpu_data(), Dtype(0.5), this->update_[param_id]->mutable_gpu_data()); caffe_gpu_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_gpu_data()); caffe_gpu_div(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(), this->update_[param_id]->mutable_gpu_data()); // scale and copy caffe_gpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->gpu_data(), Dtype(0), net_params[param_id]->mutable_gpu_diff()); } #else NO_GPU; #endif break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void NesterovSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = this->GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype momentum = this->param_.momentum(); Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // save history momentum for stepping back caffe_copy(net_params[param_id]->count(), this->history_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), this->temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // update history caffe_cpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, this->history_[param_id]->mutable_cpu_data()); // compute udpate: step back then over step caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum, this->history_[param_id]->cpu_data(), -momentum, this->update_[param_id]->mutable_cpu_data()); // copy caffe_copy(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: #ifndef CPU_ONLY for (int param_id = 0; param_id < net_params.size(); ++param_id) { // save history momentum for stepping back caffe_copy(net_params[param_id]->count(), this->history_[param_id]->gpu_data(), this->update_[param_id]->mutable_gpu_data()); Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), this->temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // update history caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, this->history_[param_id]->mutable_gpu_data()); // compute udpate: step back then over step caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum, this->history_[param_id]->gpu_data(), -momentum, this->update_[param_id]->mutable_gpu_data()); // copy caffe_copy(net_params[param_id]->count(), this->update_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } #else NO_GPU; #endif break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void SGDSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = GetLearningRate(); Dtype momentum = this->param_.momentum(); if (this->param_.momentum_burnin() > this->iter_) { momentum = momentum * this->iter_ / this->param_.momentum_burnin(); } if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate << ", mom = " << momentum; } Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } caffe_cpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: #ifndef CPU_ONLY for (int param_id = 0; param_id < net_params.size(); ++param_id) { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } caffe_gpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->gpu_diff(), momentum, history_[param_id]->mutable_gpu_data()); // copy caffe_copy(net_params[param_id]->count(), history_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } #else NO_GPU; #endif break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void SGDSolver<Dtype>::Regularize(int param_id) { const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; switch (Caffe::mode()) { case Caffe::CPU: { if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL)<< "Unknown regularization type: " << regularization_type; } } break; } case Caffe::GPU: { #ifndef CPU_ONLY if (this->device_->backend() == BACKEND_CUDA) { #ifdef USE_CUDA if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL)<< "Unknown regularization type: " << regularization_type; } } #endif // USE_CUDA } else { #ifdef USE_GREENTEA if (local_decay) { if (regularization_type == "L2") { // add weight decay greentea_gpu_axpy<Dtype>(this->device_->id(), net_params[param_id]->count(), local_decay, (cl_mem)(net_params[param_id]->gpu_data()), 0, (cl_mem)(net_params[param_id]->mutable_gpu_diff()), 0); } else if (regularization_type == "L1") { greentea_gpu_sign<Dtype>(this->device_->id(), net_params[param_id]->count(), (cl_mem)(net_params[param_id]->gpu_data()), 0, (cl_mem)(temp_[param_id]->mutable_gpu_data()), 0); greentea_gpu_axpy<Dtype>(this->device_->id(), net_params[param_id]->count(), local_decay, (cl_mem)(temp_[param_id]->gpu_data()), 0, (cl_mem)(net_params[param_id]->mutable_gpu_diff()), 0); } else { LOG(FATAL)<< "Unknown regularization type: " << regularization_type; } } #endif // USE_GREENTEA } #else NO_GPU; #endif break; } default: { LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } } }