void TripletRankingHingeLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top){ int dim_v = batch_*dim_; const Dtype* sub_or_si; const Dtype* sub_or_di; Dtype b = 2; Dtype Tripletlosstotal(0.0); //The triplet ranking loss caffe_sub(dim_v, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_sub_or_si.mutable_cpu_data()); // F-F+ caffe_sub(dim_v, bottom[0]->cpu_data(), bottom[2]->cpu_data(), diff_sub_or_di.mutable_cpu_data()); // F-F- caffe_powx(dim_v, diff_sub_or_si.cpu_data(), Dtype(2.0), diff_pow_or_si.mutable_cpu_data()); //Pow caffe_powx(dim_v, diff_sub_or_di.cpu_data(), Dtype(2.0), diff_pow_or_di.mutable_cpu_data()); //Pow for (int n = 0; n < batch_; n++){ sub_or_si = diff_pow_or_si.cpu_data() + diff_pow_or_si.offset(n); sub_or_di = diff_pow_or_di.cpu_data() + diff_pow_or_di.offset(n); Dtype result1 = 0; Dtype result2 = 0; result1 = caffe_cpu_asum(dim_, sub_or_si); result2 = caffe_cpu_asum(dim_, sub_or_di); Dtype loss(0.0); loss = std::max(margin + result1 - result2, Dtype(0));// compute the loss diff_.mutable_cpu_data()[n] = loss; // save the loss[i] } for (int k = 0; k < batch_; k++){ dist_sq_.mutable_cpu_data()[k] = diff_.cpu_data()[k];// save the loss[i] for BP Tripletlosstotal += dist_sq_.cpu_data()[k]; } Tripletlosstotal = Tripletlosstotal / static_cast<Dtype>(bottom[0]->num()); //get the average loss top[0]->mutable_cpu_data()[0] = Tripletlosstotal; }
Dtype TripletClipHingeLossLayer<Dtype>:: compute_structureloss(const vector<Blob<Dtype>*>& bottom){ Dtype Structureloss(0.0); int batch_size = bottom[0]->num() / frame_num; for (int i = 0; i < batch_size; ++i){ for (int j = 0; j < frame_num - 1; ++j){ int index_1 = i*frame_num*dim + j*dim; int index_2 = i*frame_num*dim + (j + 1)*dim; int direct = i*(frame_num - 1)*dim + j*dim; caffe_sub(dim, bottom[0]->cpu_data() + index_1, bottom[0]->cpu_data() + index_2, sub_or.mutable_cpu_data() + direct); caffe_sub(dim, bottom[1]->cpu_data() + index_1, bottom[1]->cpu_data() + index_2, sub_si.mutable_cpu_data() + direct); caffe_sub(dim, bottom[2]->cpu_data() + index_1, bottom[2]->cpu_data() + index_2, sub_di.mutable_cpu_data() + direct); // pow caffe_powx(dim, sub_or.cpu_data() + direct, Dtype(2.0), pow_sub_or.mutable_cpu_data() + direct); caffe_powx(dim, sub_si.cpu_data() + direct, Dtype(2.0), pow_sub_si.mutable_cpu_data() + direct); caffe_powx(dim, sub_di.cpu_data() + direct, Dtype(2.0), pow_sub_di.mutable_cpu_data() + direct); // plus Structureloss += (caffe_cpu_asum(dim, pow_sub_or.cpu_data() + direct) + caffe_cpu_asum(dim, pow_sub_si.cpu_data() + direct) + caffe_cpu_asum(dim, pow_sub_di.cpu_data() + direct)); } } return Structureloss / (batch_size*(frame_num - 1) * 3); }
void MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); int num; if (this->layer_param_.mvn_param().across_channels()) num = bottom[0]->num(); else num = bottom[0]->num() * bottom[0]->channels(); int dim = bottom[0]->count() / num; Dtype eps = 1e-10; if (this->layer_param_.mvn_param().normalize_variance()) { // put the squares of bottom into temp_ caffe_powx(bottom[0]->count(), bottom_data, Dtype(2), temp_.mutable_cpu_data()); // computes variance using var(X) = E(X^2) - (EX)^2 caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(), sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E(X^2) caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2), temp_.mutable_cpu_data()); // (EX)^2 caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(), variance_.mutable_cpu_data()); // variance // do mean and variance normalization // subtract mean caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data); // normalize variance caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), variance_.mutable_cpu_data()); caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data); } else { caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX // subtract mean caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data); } }
Dtype TripletClipHingeLossLayer<Dtype>::compute_tripletloss(int batchsize, int Dimv){ Dtype Tripletlosstotal(0.0); const Dtype* sub_or_si; const Dtype* sub_or_di; //The triplet ranking loss caffe_sub(Dimv, ave_or.cpu_data(), ave_si.cpu_data(), diff_sub_or_si.mutable_cpu_data()); // F-F+ caffe_sub(Dimv, ave_or.cpu_data(), ave_di.cpu_data(), diff_sub_or_di.mutable_cpu_data()); // F-F- caffe_powx(Dimv, diff_sub_or_si.cpu_data(), Dtype(2.0), diff_pow_or_si.mutable_cpu_data()); //Pow caffe_powx(Dimv, diff_sub_or_di.cpu_data(), Dtype(2.0), diff_pow_or_di.mutable_cpu_data()); //Pow for (int n = 0; n < batchsize; n++) { sub_or_si = diff_pow_or_si.cpu_data() + diff_pow_or_si.offset(n); sub_or_di = diff_pow_or_di.cpu_data() + diff_pow_or_di.offset(n); Dtype result1 = 0; Dtype result2 = 0; result1 = caffe_cpu_asum(dim, sub_or_si); result2 = caffe_cpu_asum(dim, sub_or_di); Dtype loss(0.0); loss = std::max(margin + result1 - result2, Dtype(FLT_MIN));// compute the loss diff_.mutable_cpu_data()[n] = loss; // save the loss[i] } for (int k = 0; k < batchsize; k++) { dist_sq_.mutable_cpu_data()[k] = diff_.cpu_data()[k];// save the loss[i] for BP Tripletlosstotal += dist_sq_.cpu_data()[k]; } return Tripletlosstotal / static_cast<Dtype>(batchsize); }
void RMSPropSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) { const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_lr = this->net_->params_lr(); // get the learning rate Dtype delta = this->param_.delta(); Dtype rms_decay = this->param_.rms_decay(); Dtype local_rate = rate * net_params_lr[param_id]; switch (Caffe::mode()) { case Caffe::CPU: // compute square of gradient in update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history caffe_cpu_axpby(net_params[param_id] -> count(), Dtype(1-rms_decay), this->update_[param_id]->cpu_data(), rms_decay, this->history_[param_id]-> mutable_cpu_data()); // prepare update caffe_powx(net_params[param_id]->count(), this->history_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); caffe_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_cpu_data()); caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); // scale and copy caffe_cpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->cpu_data(), Dtype(0), net_params[param_id]->mutable_cpu_diff()); break; case Caffe::GPU: #ifndef CPU_ONLY rmsprop_update_gpu(net_params[param_id]->count(), net_params[param_id]->mutable_gpu_diff(), this->history_[param_id]->mutable_gpu_data(), rms_decay, delta, local_rate); #else NO_GPU; #endif break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); int num; if (this->layer_param_.mvn_param().across_channels()) num = bottom[0]->num(); else num = bottom[0]->num() * bottom[0]->channels(); int dim = bottom[0]->count() / num; // subtract mean caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data); // X-EX if (this->layer_param_.mvn_param().normalize_variance()) { // compute variance using var(X) = E((X-EX)^2) caffe_powx(bottom[0]->count(), top_data, Dtype(2), temp_.mutable_cpu_data()); // (X-EX)^2 caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(), sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E((X-EX)^2) // normalize variance caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), variance_.mutable_cpu_data()); caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data); } }
void WeightPlusLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top){ const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* weight = this->blobs_[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); for (int n = 0; n < batch_; ++n){ int offset = n*dim_; caffe_powx(dim_, weight, Dtype(2.0), weight_pow_.mutable_cpu_data()); caffe_mul(dim_, bottom_data + offset, weight_pow_.cpu_data(), top_data + offset); } }
void MyNeuronLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& bottom){ const int count = top[0]->count(); const Dtype* top_diff = top[0]->cpu_diff(); if(propagate_down[0]){ const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_powx(count, bottom_data, Dtype(power_ - 1), bottom_diff); caffe_scal(count, Dtype(power_), bottom_diff); caffe_mul(count, bottom_diff, top_diff, bottom_diff); } }
void MVNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); int num; if (this->layer_param_.mvn_param().across_channels()) num = bottom[0]->num(); else num = bottom[0]->num() * bottom[0]->channels(); int dim = bottom[0]->count() / num; if (this->layer_param_.mvn_param().normalize_variance()) { caffe_mul(temp_.count(), top_data, top_diff, bottom_diff); caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., bottom_diff); caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff); caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 1., bottom_diff); caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim), bottom_diff); // put the squares of bottom into temp_ caffe_powx(temp_.count(), bottom_data, Dtype(2), temp_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff); } else { caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, top_diff, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_add(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff); } }
void EuclideanDistLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); int num = bottom[0]->num(); int dim = count / num; caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); caffe_powx(count, diff_.cpu_data(), Dtype(2), temp_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, Dtype(0.5), temp_.cpu_data(), sum_multiplier_.cpu_data(), 0., top[0]->mutable_cpu_data()); }
void LogLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } const int count = bottom[0]->count(); const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_cpu_copy(count, bottom_data, bottom_diff); if (input_scale_ != Dtype(1)) { caffe_scal(count, input_scale_, bottom_diff); } if (input_shift_ != Dtype(0)) { caffe_add_scalar(count, input_shift_, bottom_diff); } caffe_powx(count, bottom_diff, Dtype(-1), bottom_diff); if (backward_num_scale_ != Dtype(1)) { caffe_scal(count, backward_num_scale_, bottom_diff); } caffe_mul(count, top_diff, bottom_diff, bottom_diff); }
void FocalLossLayer<Dtype>::compute_intermediate_values_of_cpu() { // compute the corresponding variables const int count = prob_.count(); const Dtype* prob_data = prob_.cpu_data(); const Dtype* ones_data = ones_.cpu_data(); Dtype* log_prob_data = log_prob_.mutable_cpu_data(); Dtype* power_prob_data = power_prob_.mutable_cpu_data(); /// log(p_t) const Dtype eps = Dtype(FLT_MIN); // where FLT_MIN = 1.17549e-38, here u can change it // more stable for(int i = 0; i < prob_.count(); i++) { log_prob_data[i] = log(std::max(prob_data[i], eps)); } /// caffe_log(count, prob_data, log_prob_data); /// alpha* (1 - p_t) ^ gamma caffe_sub(count, ones_data, prob_data, power_prob_data); caffe_powx(count, power_prob_.cpu_data(), gamma_, power_prob_data); caffe_scal(count, alpha_, power_prob_data); }
void PowerLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype* top_data = top[0]->mutable_cpu_data(); const int count = bottom[0]->count(); // Special case where we can ignore the input: scale or power is 0. if (diff_scale_ == Dtype(0)) { Dtype value = (power_ == 0) ? Dtype(1) : pow(shift_, power_); caffe_set(count, value, top_data); return; } const Dtype* bottom_data = bottom[0]->cpu_data(); caffe_copy(count, bottom_data, top_data); if (scale_ != Dtype(1)) { caffe_scal(count, scale_, top_data); } if (shift_ != Dtype(0)) { caffe_add_scalar(count, shift_, top_data); } if (power_ != Dtype(1)) { caffe_powx(count, top_data, power_, top_data); } }
void MyNeuronLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top){ Dtype* top_data = top[0]->mutable_cpu_data(); const int count = bottom[0]->count(); caffe_powx(count, bottom[0]->cpu_data(), Dtype(power_), top_data); }
void BatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); int num = bottom[0]->shape(0); int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_); if (bottom[0] != top[0]) { caffe_copy(bottom[0]->count(), bottom_data, top_data); } if (use_global_stats_) { // use the stored mean/variance estimates. const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ? 0 : 1 / this->blobs_[2]->cpu_data()[0]; caffe_cpu_scale(variance_.count(), scale_factor, this->blobs_[0]->cpu_data(), mean_.mutable_cpu_data()); caffe_cpu_scale(variance_.count(), scale_factor, this->blobs_[1]->cpu_data(), variance_.mutable_cpu_data()); } else { // compute mean caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim, 1. / (num * spatial_dim), bottom_data, spatial_sum_multiplier_.cpu_data(), 0., num_by_chans_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1., num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); } // subtract mean caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0., num_by_chans_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num, spatial_dim, 1, -1, num_by_chans_.cpu_data(), spatial_sum_multiplier_.cpu_data(), 1., top_data); if (!use_global_stats_) { // compute variance using var(X) = E((X-EX)^2) caffe_powx(top[0]->count(), top_data, Dtype(2), temp_.mutable_cpu_data()); // (X-EX)^2 caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim, 1. / (num * spatial_dim), temp_.cpu_data(), spatial_sum_multiplier_.cpu_data(), 0., num_by_chans_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1., num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E((X_EX)^2) // compute and save moving average this->blobs_[2]->mutable_cpu_data()[0] *= moving_average_fraction_; this->blobs_[2]->mutable_cpu_data()[0] += 1; caffe_cpu_axpby(mean_.count(), Dtype(1), mean_.cpu_data(), moving_average_fraction_, this->blobs_[0]->mutable_cpu_data()); int m = bottom[0]->count()/channels_; Dtype bias_correction_factor = m > 1 ? Dtype(m)/(m-1) : 1; caffe_cpu_axpby(variance_.count(), bias_correction_factor, variance_.cpu_data(), moving_average_fraction_, this->blobs_[1]->mutable_cpu_data()); } // normalize variance caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data()); caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), variance_.mutable_cpu_data()); // replicate variance to input size caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, batch_sum_multiplier_.cpu_data(), variance_.cpu_data(), 0., num_by_chans_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num, spatial_dim, 1, 1., num_by_chans_.cpu_data(), spatial_sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data); // TODO(cdoersch): The caching is only needed because later in-place layers // might clobber the data. Can we skip this if they won't? caffe_copy(x_norm_.count(), top_data, x_norm_.mutable_cpu_data()); }
void AdaGradSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = this->GetLearningRate(); Dtype delta = this->param_.delta(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), this->temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // compute square of gradient in update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history caffe_add(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), this->history_[param_id]->cpu_data(), this->history_[param_id]->mutable_cpu_data()); // prepare update caffe_powx(net_params[param_id]->count(), this->history_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); caffe_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_cpu_data()); caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); // scale and copy caffe_cpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->cpu_data(), Dtype(0), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: #ifndef CPU_ONLY for (int param_id = 0; param_id < net_params.size(); ++param_id) { Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), this->temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // compute square of gradient in update caffe_gpu_powx(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), Dtype(2), this->update_[param_id]->mutable_gpu_data()); // update history caffe_gpu_add(net_params[param_id]->count(), this->update_[param_id]->gpu_data(), this->history_[param_id]->gpu_data(), this->history_[param_id]->mutable_gpu_data()); // prepare update caffe_gpu_powx(net_params[param_id]->count(), this->history_[param_id]->gpu_data(), Dtype(0.5), this->update_[param_id]->mutable_gpu_data()); caffe_gpu_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_gpu_data()); caffe_gpu_div(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(), this->update_[param_id]->mutable_gpu_data()); // scale and copy caffe_gpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->gpu_data(), Dtype(0), net_params[param_id]->mutable_gpu_diff()); } #else NO_GPU; #endif break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) { const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_lr = this->net_->params_lr(); Dtype delta = this->param_.delta(); Dtype momentum = this->param_.momentum(); Dtype local_rate = rate * net_params_lr[param_id]; size_t update_history_offset = net_params.size(); switch (Caffe::mode()) { case Caffe::CPU: { // compute square of gradient in update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history of gradients caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, this->update_[param_id]->cpu_data(), momentum, this->history_[param_id]->mutable_cpu_data()); // add delta to history to guard against dividing by zero later caffe_set(net_params[param_id]->count(), delta, this->temp_[param_id]->mutable_cpu_data()); caffe_add(net_params[param_id]->count(), this->temp_[param_id]->cpu_data(), this->history_[update_history_offset + param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); caffe_add(net_params[param_id]->count(), this->temp_[param_id]->cpu_data(), this->history_[param_id]->cpu_data(), this->temp_[param_id]->mutable_cpu_data()); // divide history of updates by history of gradients caffe_div(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), this->temp_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); // jointly compute the RMS of both for update and gradient history caffe_powx(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); // compute the update caffe_mul(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); // compute square of update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history of updates caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, this->update_[param_id]->cpu_data(), momentum, this->history_[update_history_offset + param_id]->mutable_cpu_data()); // apply learning rate caffe_cpu_scale(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), net_params[param_id]->mutable_cpu_diff()); break; } case Caffe::GPU: { #ifndef CPU_ONLY adadelta_update_gpu(net_params[param_id]->count(), net_params[param_id]->mutable_gpu_diff(), this->history_[param_id]->mutable_gpu_data(), this->history_[update_history_offset + param_id]->mutable_gpu_data(), momentum, delta, local_rate); #else NO_GPU; #endif break; } default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void BNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (frozen_) { if (propagate_down[0]) { const Dtype* const_top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); // Use the moving average variance caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(), batch_statistic_.mutable_cpu_data()); // Add eps caffe_add_scalar(batch_statistic_.count(), bn_eps_, batch_statistic_.mutable_cpu_data()); // Standard deviation caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(), Dtype(0.5), batch_statistic_.mutable_cpu_data()); // Divide slope by std caffe_div(batch_statistic_.count(), this->blobs_[0]->cpu_data(), batch_statistic_.cpu_data(), batch_statistic_.mutable_cpu_data()); // Broadcast caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); // Elementwise multiply top grad with (slope / std) caffe_mul(broadcast_buffer_.count(), const_top_diff, broadcast_buffer_.cpu_data(), bottom_diff); } return; } // gradient w.r.t. slope if (this->param_propagate_down_[0]) { const Dtype* const_top_diff = top[0]->cpu_diff(); Dtype* scale_diff = this->blobs_[0]->mutable_cpu_diff(); caffe_mul(broadcast_buffer_.count(), x_norm_.cpu_data(), const_top_diff, broadcast_buffer_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1), broadcast_buffer_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1), spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(1), scale_diff); } // gradient w.r.t. bias if (this->param_propagate_down_[1]) { const Dtype* const_top_diff = top[0]->cpu_diff(); Dtype* shift_diff = this->blobs_[1]->mutable_cpu_diff(); caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1), const_top_diff, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1), spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(1), shift_diff); } // gradient w.r.t. normalized inputs if (propagate_down[0]) { const Dtype* const_top_diff = top[0]->cpu_diff(); const Dtype* const_bottom_diff = bottom[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* scale_data = this->blobs_[0]->cpu_data(); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); caffe_mul(broadcast_buffer_.count(), const_top_diff, broadcast_buffer_.cpu_data(), broadcast_buffer_.mutable_cpu_data()); // sum of x_hat * (dl / dx_hat) caffe_mul(broadcast_buffer_.count(), x_norm_.cpu_data(), broadcast_buffer_.cpu_data(), bottom_diff); caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1), const_bottom_diff, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1), spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data()); // x_hat times the sum caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), bottom_diff); caffe_mul(broadcast_buffer_.count(), x_norm_.cpu_data(), const_bottom_diff, bottom_diff); // Subtract the average of x_hat times the sum caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1), broadcast_buffer_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1), spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(1), bottom_diff); caffe_cpu_axpby(broadcast_buffer_.count(), Dtype(1), broadcast_buffer_.cpu_data(), Dtype(-1) / (num_ * height_ * width_), bottom_diff); // Divide by the std caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), x_std_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); caffe_div(broadcast_buffer_.count(), const_bottom_diff, broadcast_buffer_.cpu_data(), bottom_diff); } }
void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* const_bottom_data = bottom[0]->cpu_data(); const Dtype* const_top_data = top[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* scale_data = this->blobs_[0]->cpu_data(); const Dtype* shift_data = this->blobs_[1]->cpu_data(); // ---------- mean subtraction ---------- // // statistic across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1. / (height_ * width_)), const_bottom_data, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); // statistic across batch caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1. / num_), spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data()); // save history mean if (this->phase_ == TRAIN) { caffe_cpu_axpby(batch_statistic_.count(), decay_, batch_statistic_.cpu_data(), Dtype(1) - decay_, this->blobs_[2]->mutable_cpu_data()); } if (this->phase_ == TEST && moving_average_) { // use moving average mean caffe_copy(batch_statistic_.count(), this->blobs_[2]->cpu_data(), batch_statistic_.mutable_cpu_data()); } // put mean blob into buffer_blob_ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(-1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); // substract mean caffe_add(buffer_blob_.count(), const_bottom_data, buffer_blob_.cpu_data(), top_data); // ---------- variance normalization ---------- // // put the squares of X - mean into buffer_blob_ caffe_powx(buffer_blob_.count(), const_top_data, Dtype(2), buffer_blob_.mutable_cpu_data()); // statistic across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1. / (height_ * width_)), buffer_blob_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); // statistic across batch caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1. / num_), spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data()); // save history variance if (this->phase_ == TRAIN) { caffe_cpu_axpby(batch_statistic_.count(), decay_, batch_statistic_.cpu_data(), Dtype(1) - decay_, this->blobs_[3]->mutable_cpu_data()); } if (this->phase_ == TEST && moving_average_) { // use moving average variance caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(), batch_statistic_.mutable_cpu_data()); } // add eps caffe_add_scalar(batch_statistic_.count(), var_eps_, batch_statistic_.mutable_cpu_data()); // std caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(), Dtype(0.5), batch_statistic_.mutable_cpu_data()); // put std blob into buffer_blob_ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); // variance normalization caffe_div(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); // ---------- save x_norm and x_std ---------- // caffe_copy(buffer_blob_.count(), const_top_data, x_norm_.mutable_cpu_data()); caffe_copy(batch_statistic_.count(), batch_statistic_.cpu_data(), x_std_.mutable_cpu_data()); // ---------- scale ---------- // caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_mul(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); // ---------- shift ---------- // caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_add(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); }
void MVNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* bottom_data = (*bottom)[0]->cpu_data(); Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); int num; if (this->layer_param_.mvn_param().across_channels()) num = (*bottom)[0]->num(); else num = (*bottom)[0]->num() * (*bottom)[0]->channels(); int dim = (*bottom)[0]->count() / num; Dtype eps = 1e-10; if (this->layer_param_.mvn_param().normalize_variance()) { caffe_mul(temp_.count(), top_data, top_diff, bottom_diff); caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., bottom_diff); caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff); caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 1., bottom_diff); caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim), bottom_diff); // put the squares of bottom into temp_ caffe_powx(temp_.count(), bottom_data, Dtype(2), temp_.mutable_cpu_data()); // computes variance using var(X) = E(X^2) - (EX)^2 caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(), sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E(X^2) caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2), temp_.mutable_cpu_data()); // (EX)^2 caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(), variance_.mutable_cpu_data()); // variance // normalize variance caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), variance_.mutable_cpu_data()); caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff); } else { caffe_copy(temp_.count(), top_diff, bottom_diff); } }
void BNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); Dtype* scale_diff = this->blobs_[0]->mutable_cpu_diff(); Dtype* shift_diff = this->blobs_[1]->mutable_cpu_diff(); const Dtype* scale_data = this->blobs_[0]->cpu_data(); switch (this->layer_param_.bn_param().bn_mode()) { case BNParameter_BNMode_LEARN: // Propagate layer to parameters // gradient w.r.t. scale caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(), top_diff, buffer_blob_.mutable_cpu_data()); // EX across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), buffer_blob_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_variance_.mutable_cpu_diff()); // EX across batch caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1), spatial_variance_.cpu_diff(), batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff); // gradient w.r.t. shift // EX across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), top_diff, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_mean_.mutable_cpu_diff()); // EX across batch caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1), spatial_mean_.cpu_diff(), batch_sum_multiplier_.cpu_data(), Dtype(0), shift_diff); // Propagate down // put scale * top_diff to buffer_blob_ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_variance_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(), buffer_blob_.mutable_cpu_data()); // use new top diff for computation caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(), buffer_blob_.cpu_data(), bottom_diff); // EX across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), bottom_diff, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_mean_.mutable_cpu_data()); // EX across batch caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1), spatial_mean_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_mean_.cpu_data(), Dtype(0), spatial_mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_mean_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), bottom_diff); caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(), bottom_diff, bottom_diff); // EX across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), buffer_blob_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_mean_.mutable_cpu_data()); // EX across batch caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1), spatial_mean_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_mean_.cpu_data(), Dtype(0), spatial_mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_mean_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(1), bottom_diff); caffe_cpu_axpby(buffer_blob_.count(), Dtype(1), buffer_blob_.cpu_data(), Dtype(-1. / (N_ * H_ * W_)), bottom_diff); // put the squares of bottom into buffer_blob_ caffe_powx(buffer_blob_.count(), bottom_data, Dtype(2), buffer_blob_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_variance_.cpu_data(), Dtype(0), spatial_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_variance_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_div(buffer_blob_.count(), bottom_diff, buffer_blob_.cpu_data(), bottom_diff); break; case BNParameter_BNMode_INFERENCE: // Propagate layer to parameters // gradient w.r.t. scale caffe_mul(buffer_blob_.count(), bottom_data, top_diff, buffer_blob_.mutable_cpu_data()); // EX across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), buffer_blob_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_variance_.mutable_cpu_diff()); // EX across batch caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1), spatial_variance_.cpu_diff(), batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff); // gradient w.r.t. shift // EX across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1), top_diff, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_mean_.mutable_cpu_diff()); // EX across batch caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1), spatial_mean_.cpu_diff(), batch_sum_multiplier_.cpu_data(), Dtype(0), shift_diff); // Propagate down // put scale * top_diff to buffer_blob_ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_variance_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(), bottom_diff); break; default: LOG(FATAL) << "Unknown BN mode."; } }
void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* const_top_data = top[0]->cpu_data(); const Dtype* scale_data = this->blobs_[0]->cpu_data(); const Dtype* shift_data = this->blobs_[1]->cpu_data(); switch (this->layer_param_.bn_param().bn_mode()) { case BNParameter_BNMode_LEARN: // put the squares of bottom into buffer_blob_ caffe_powx(bottom[0]->count(), bottom_data, Dtype(2), buffer_blob_.mutable_cpu_data()); // computes variance using var(X) = E(X^2) - (EX)^2 // EX across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1. / (H_ * W_)), bottom_data, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_mean_.mutable_cpu_data()); // EX across batch caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_), spatial_mean_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_mean_.mutable_cpu_data()); // E(X^2) across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1. / (H_ * W_)), buffer_blob_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_variance_.mutable_cpu_data()); // E(X^2) across batch caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_), spatial_variance_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_variance_.mutable_cpu_data()); caffe_powx(batch_mean_.count(), batch_mean_.cpu_data(), Dtype(2), buffer_blob_.mutable_cpu_data()); // (EX)^2 caffe_sub(batch_mean_.count(), batch_variance_.cpu_data(), buffer_blob_.cpu_data(), batch_variance_.mutable_cpu_data()); // variance // save top[1] (batch_mean) and top[2] (batch_variance) if (top.size() > 1) { caffe_copy(batch_mean_.count(), batch_mean_.cpu_data(), top[1]->mutable_cpu_data()); } if (top.size() > 2) { caffe_copy(batch_variance_.count(), batch_variance_.cpu_data(), top[2]->mutable_cpu_data()); } // do mean and variance normalization // subtract mean caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_mean_.cpu_data(), Dtype(0), spatial_mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(-1), spatial_mean_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_add(buffer_blob_.count(), bottom_data, buffer_blob_.cpu_data(), top_data); // normalize variance caffe_add_scalar(batch_variance_.count(), var_eps_, batch_variance_.mutable_cpu_data()); caffe_powx(batch_variance_.count(), batch_variance_.cpu_data(), Dtype(0.5), batch_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_variance_.cpu_data(), Dtype(0), spatial_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_variance_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_div(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); // Saving x_norm caffe_copy(buffer_blob_.count(), const_top_data, x_norm_.mutable_cpu_data()); // scale caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_variance_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_mul(buffer_blob_.count(), top_data, buffer_blob_.cpu_data(), top_data); // shift caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0), spatial_mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_mean_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_add(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); break; case BNParameter_BNMode_INFERENCE: // scale caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_variance_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_mul(buffer_blob_.count(), bottom_data, buffer_blob_.cpu_data(), top_data); // shift caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0), spatial_mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_mean_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_add(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); break; default: LOG(FATAL) << "Unknown BN mode."; } }
void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* const_bottom_data = bottom[0]->cpu_data(); const Dtype* const_top_data = top[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* scale_data = this->blobs_[0]->cpu_data(); const Dtype* shift_data = this->blobs_[1]->cpu_data(); // Mean normalization if (frozen_ || this->phase_ == TEST) { // Use the moving average mean caffe_copy(batch_statistic_.count(), this->blobs_[2]->cpu_data(), batch_statistic_.mutable_cpu_data()); } else { // Compute the mean by averaging over spatial and batch dimensions. caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1) / (height_ * width_), const_bottom_data, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1) / num_, spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data()); // Add to the moving average if (!frozen_) { caffe_cpu_axpby(batch_statistic_.count(), Dtype(1) - bn_momentum_, batch_statistic_.cpu_data(), bn_momentum_, this->blobs_[2]->mutable_cpu_data()); } } // Broadcast the mean vector caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(-1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); // Subtract caffe_add(broadcast_buffer_.count(), const_bottom_data, broadcast_buffer_.cpu_data(), top_data); // Variance normalization if (frozen_ || this->phase_ == TEST) { // Use the moving average variance caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(), batch_statistic_.mutable_cpu_data()); } else { caffe_powx(broadcast_buffer_.count(), const_top_data, Dtype(2), broadcast_buffer_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1) / (height_ * width_), broadcast_buffer_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1) / num_, spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data()); // Add eps caffe_add_scalar(batch_statistic_.count(), bn_eps_, batch_statistic_.mutable_cpu_data()); // Inverse standard deviation caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(), Dtype(-0.5), batch_statistic_.mutable_cpu_data()); // Add to the moving average if (!frozen_) { caffe_cpu_axpby(batch_statistic_.count(), Dtype(1) - bn_momentum_, batch_statistic_.cpu_data(), bn_momentum_, this->blobs_[3]->mutable_cpu_data()); } } // Broadcast the inverse std caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); // Multiply with the inverse std caffe_mul(broadcast_buffer_.count(), const_top_data, broadcast_buffer_.cpu_data(), top_data); // Save the normalized inputs and std for backprop if (!frozen_) { caffe_copy(broadcast_buffer_.count(), const_top_data, x_norm_.mutable_cpu_data()); caffe_copy(batch_statistic_.count(), batch_statistic_.cpu_data(), x_inv_std_.mutable_cpu_data()); } // Scale caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); caffe_mul(broadcast_buffer_.count(), const_top_data, broadcast_buffer_.cpu_data(), top_data); // Shift caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); caffe_add(broadcast_buffer_.count(), const_top_data, broadcast_buffer_.cpu_data(), top_data); }
void AdaGradSolver<Dtype>::ComputeUpdateValue(uint_tp param_id, Dtype rate) { CHECK(Caffe::root_solver()); const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_lr = this->net_->params_lr(); Dtype delta = this->param_.delta(); Dtype local_rate = rate * net_params_lr[param_id]; switch (Caffe::mode()) { case Caffe::CPU: { // compute square of gradient in update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history caffe_add(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), this->history_[param_id]->cpu_data(), this->history_[param_id]->mutable_cpu_data()); // prepare update caffe_powx(net_params[param_id]->count(), this->history_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); caffe_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_cpu_data()); caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); // scale and copy caffe_cpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->cpu_data(), Dtype(0), net_params[param_id]->mutable_cpu_diff()); break; } case Caffe::GPU: { #ifndef CPU_ONLY if (this->device_->backend() == BACKEND_CUDA) { #ifdef USE_CUDA // compute square of gradient in update caffe_gpu_powx(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), Dtype(2), this->update_[param_id]->mutable_gpu_data()); // update history caffe_gpu_add(net_params[param_id]->count(), this->update_[param_id]->gpu_data(), this->history_[param_id]->gpu_data(), this->history_[param_id]->mutable_gpu_data()); // prepare update caffe_gpu_powx(net_params[param_id]->count(), this->history_[param_id]->gpu_data(), Dtype(0.5), this->update_[param_id]->mutable_gpu_data()); caffe_gpu_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_gpu_data()); caffe_gpu_div(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(), this->update_[param_id]->mutable_gpu_data()); // scale and copy caffe_gpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->gpu_data(), Dtype(0), net_params[param_id]->mutable_gpu_diff()); #endif // USE_CUDA } else { #ifdef USE_GREENTEA // compute square of gradient in update greentea_gpu_powx<Dtype>( this->device_->id(), net_params[param_id]->count(), (cl_mem) (net_params[param_id]->gpu_diff()), 0, Dtype(2), (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0); // update history greentea_gpu_add<Dtype>( this->device_->id(), net_params[param_id]->count(), (cl_mem) (this->update_[param_id]->gpu_data()), 0, (cl_mem) (this->history_[param_id]->gpu_data()), 0, (cl_mem) (this->history_[param_id]->mutable_gpu_data()), 0); // prepare update greentea_gpu_powx<Dtype>( this->device_->id(), net_params[param_id]->count(), (cl_mem) (this->history_[param_id]->gpu_data()), 0, Dtype(0.5), (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0); greentea_gpu_add_scalar<Dtype>( this->device_->id(), net_params[param_id]->count(), delta, (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0); greentea_gpu_div<Dtype>( this->device_->id(), net_params[param_id]->count(), (cl_mem) (net_params[param_id]->gpu_diff()), 0, (cl_mem) (this->update_[param_id]->gpu_data()), 0, (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0); // scale and copy greentea_gpu_axpby<Dtype>( this->device_->id(), net_params[param_id]->count(), local_rate, (cl_mem) (this->update_[param_id]->gpu_data()), 0, Dtype(0), (cl_mem) (net_params[param_id]->mutable_gpu_diff()), 0); #endif // USE_GREENTEA } #else NO_GPU; #endif break; } default: LOG(FATAL)<< "Unknown caffe mode: " << Caffe::mode(); } }