void ScalarLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* bottom_data = bottom[0]->cpu_data(); // Hack: store big eltwise product in bottom[0] diff, except in the special // case where this layer itself does the eltwise product, in which case we // can store it directly in the scalar diff, and we're done. const bool is_eltwise = (inner_dim_ == 1 && outer_dim_ == 1); Dtype* product = is_eltwise ? bottom[1]->mutable_cpu_diff() : bottom[0]->mutable_cpu_diff(); caffe_mul(top[0]->count(), top_diff, bottom_data, product); if (!is_eltwise) { Dtype* sum_result = NULL; if (inner_dim_ == 1) { sum_result = product; } else if (sum_result_.count() == 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scalar_diff = bottom[1]->mutable_cpu_diff(); *scalar_diff = caffe_cpu_dot(inner_dim_, product, sum_mult); } else { const Dtype* sum_mult = sum_multiplier_.cpu_data(); sum_result = (outer_dim_ == 1) ? bottom[1]->mutable_cpu_diff() : sum_result_.mutable_cpu_data(); caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_, Dtype(1), product, sum_mult, Dtype(0), sum_result); } if (outer_dim_ != 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scalar_diff = bottom[1]->mutable_cpu_diff(); if (scalar_dim_ == 1) { *scalar_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); } else { caffe_cpu_gemv(CblasTrans, outer_dim_, scalar_dim_, Dtype(1), sum_result, sum_mult, Dtype(0), scalar_diff); } } } } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* scalar_data = bottom[1]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); for (int n = 0; n < outer_dim_; ++n) { for (int d = 0; d < scalar_dim_; ++d) { const Dtype factor = scalar_data[d]; caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff); bottom_diff += inner_dim_; top_diff += inner_dim_; } } } }
void BiasLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0] && bottom[0] != top[0]) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_copy(bottom[0]->count(), top_diff, bottom_diff); #ifdef USE_MLSL this->on_delinp_ready(propagate_down); #endif } // in-place, we don't need to do anything with the data diff const bool bias_param = (bottom.size() == 1); if ((!bias_param && propagate_down[1]) || (bias_param && this->param_propagate_down_[0])) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1]) ->mutable_cpu_diff(); bool accum = bias_param; for (int n = 0; n < outer_dim_; ++n) { caffe_cpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1), top_diff, bias_multiplier_.cpu_data(), Dtype(accum), bias_diff); top_diff += dim_; accum = true; } } }
void ZJQContextLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const Dtype* const bottom_data = bottom[0]->cpu_data(); const Dtype* const context = bottom[1]->cpu_data(); const Dtype* const all_ones = all_ones_.cpu_data(); const Dtype* const weight = this->blobs_[0]->cpu_data(); Dtype* const w_multi_context = w_multi_context_.mutable_cpu_data(); Dtype* const top_data = (*top)[0]->mutable_cpu_data(); caffe_cpu_gemv(CblasNoTrans, num_feat_map_, context_dim_, 1.0, weight, context, 0, w_multi_context); const int num = bottom[0]->num(); const int dim = bottom[0]->dim(); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_feat_map_, height_ * width_, 1, 1.0, w_multi_context, all_ones, 0, tmp_.mutable_cpu_data()); memcpy(top_data, bottom_data, sizeof(Dtype) * bottom[0]->count()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, (Dtype) 1., tmp_.cpu_data(), all_ones_sample_.cpu_data(), (Dtype) 1., top_data); }
void Forward_cpu_GEMV_L(const vector<Blob<Dtype> *> &bottom, const vector<Blob<Dtype> *> &top) { const Dtype *bottom_data_a = bottom[0]->cpu_data(); const Dtype *bottom_data_b = bottom[1]->cpu_data(); const int num = bottom[0]->count(0, bottom[0]->num_axes() - 1); Dtype *top_data = top[0]->mutable_cpu_data(); const int _m = bottom[1]->shape(-2); const int _n = bottom[1]->shape(-1); const int _stride_a = _m; const int _stride_b = _m * _n; const int _stride_c = _n; for (int i = 0; i < num; ++i) { caffe_cpu_gemv(CblasTrans, _m, _n, (Dtype)1, bottom_data_b + i * _stride_b, bottom_data_a + i * _stride_a, (Dtype)0., top_data + i * _stride_c); } }
void Backward_cpu_GEMV_L(const vector<Blob<Dtype> *> &top, const vector<bool> &propagate_down, const vector<Blob<Dtype> *> &bottom) { const Dtype *top_diff = top[0]->cpu_diff(); const Dtype *bottom_data_a = bottom[0]->cpu_data(); const Dtype *bottom_data_b = bottom[1]->cpu_data(); const int num = bottom[0]->count(0, bottom[0]->num_axes() - 1); const int _m = bottom[1]->shape(-2); const int _n = bottom[1]->shape(-1); const int _stride_a = _m; const int _stride_b = _m * _n; const int _stride_c = _n; if (propagate_down[0]) { Dtype *bottom_diff_a = bottom[0]->mutable_cpu_diff(); for (int i = 0; i < num; ++i) { caffe_cpu_gemv(CblasNoTrans, _m, _n, (Dtype)1., bottom_data_b + i * _stride_b, top_diff + i * _stride_c, (Dtype)0., bottom_diff_a + i * _stride_a); } } if (propagate_down[1]) { Dtype *bottom_diff_b = bottom[1]->mutable_cpu_diff(); for (int i = 0; i < num; ++i) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, _m, // M _n, // N 1, // K (Dtype)1, // alpha bottom_data_a + i * _stride_a, // x top_diff + i * _stride_c, // y (Dtype)0, // beta bottom_diff_b + i * _stride_b // a ); } } }
Dtype ClassEntropyLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const Dtype* data = bottom[0]->cpu_data(); const Dtype* avg_data = avg_.cpu_data(); Dtype* class_prob_data = class_prob_.mutable_cpu_data(); Dtype* log_class_prob_data = log_class_prob_.mutable_cpu_data(); alpha_ = alpha_ * discount_coeff_ + Dtype(1); caffe_cpu_gemv(CblasTrans, num_, channels_, Dtype(1) / alpha_, data, avg_data, (alpha_ - Dtype(1)) / alpha_, class_prob_data); for (int i=0; i<channels_; i++) LOG(INFO) << "class_prob_data[" << i <<"]=" << class_prob_data[i]; for (int i=0; i<channels_; i++) log_class_prob_data[i] = log(max(class_prob_data[i], Dtype(FLT_MIN))); Dtype loss = coeff_ * caffe_cpu_dot<Dtype>(channels_, log_class_prob_data, class_prob_data); LOG(INFO) << "loss=" << loss; return loss; }
void ScaleLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (bias_layer_ && this->param_propagate_down_[this->param_propagate_down_.size() - 1]) { bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_); } const bool scale_param = (bottom.size() == 1); Blob<Dtype>* scale = scale_param ? this->blobs_[0].get() : bottom[1]; if ((!scale_param && propagate_down[1]) || (scale_param && this->param_propagate_down_[0])) { const Dtype* top_diff = top[0]->cpu_diff(); const bool in_place = (bottom[0] == top[0]); const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data(); // Hack: store big eltwise product in bottom[0] diff, except in the special // case where this layer itself does the eltwise product, in which case we // can store it directly in the scale diff, and we're done. // If we're computing in-place (and not doing eltwise computation), this // hack doesn't work and we store the product in temp_. const bool is_eltwise = (bottom[0]->count() == scale->count()); Dtype* product = (is_eltwise ? scale->mutable_cpu_diff() : (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff())); caffe_mul(top[0]->count(), top_diff, bottom_data, product); if (!is_eltwise) { Dtype* sum_result = NULL; if (inner_dim_ == 1) { sum_result = product; } else if (sum_result_.count() == 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scale_diff = scale->mutable_cpu_diff(); if (scale_param) { Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult); *scale_diff += result; } else { *scale_diff = caffe_cpu_dot(inner_dim_, product, sum_mult); } } else { const Dtype* sum_mult = sum_multiplier_.cpu_data(); sum_result = (outer_dim_ == 1) ? scale->mutable_cpu_diff() : sum_result_.mutable_cpu_data(); caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_, Dtype(1), product, sum_mult, Dtype(0), sum_result); } if (outer_dim_ != 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scale_diff = scale->mutable_cpu_diff(); if (scale_dim_ == 1) { if (scale_param) { Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); *scale_diff += result; } else { *scale_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); } } else { caffe_cpu_gemv(CblasTrans, outer_dim_, scale_dim_, Dtype(1), sum_result, sum_mult, Dtype(scale_param), scale_diff); } } } } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* scale_data = scale->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); for (int n = 0; n < outer_dim_; ++n) { for (int d = 0; d < scale_dim_; ++d) { const Dtype factor = scale_data[d]; caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff); bottom_diff += inner_dim_; top_diff += inner_dim_; } } } }
void CRFWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { // Backward flow is splited into 2 ways , one of which is to the local parameter, // and the other is to the lower layer through the diff blob of bottom[0] Dtype* ptr_pi_diff = this->blobs_[0]->mutable_cpu_diff(); Dtype* ptr_tr_diff = this->blobs_[1]->mutable_cpu_diff(); Dtype* ptr_mu_diff = this->blobs_[2]->mutable_cpu_diff(); Dtype* ptr_bottom_diff = bottom[0]->mutable_cpu_diff(); Dtype* ptr_state_err = gamma_.mutable_cpu_data(); Dtype* ptr_trans_err = epsilon_.mutable_cpu_data(); // some data needed const Dtype* state_err = gamma_.cpu_data(); const Dtype* trans_err = epsilon_.cpu_data(); const Dtype* feature_table = bottom[0]->cpu_data(); const Dtype* label = bottom[1]->cpu_data(); const Dtype* mu = this->blobs_[2]->cpu_data(); const Dtype* pi_diff = this->blobs_[0]->cpu_diff(); // same bias needed int ts = max_seq_length_ * feature_num_; int gs = max_seq_length_ * state_num_; int eps = max_seq_length_ * state_num_ * state_num_; for (int i = 0; i < num_; ++i) { // seq length of each instance should be different.. need to be reconsidered here int sl = max_seq_length_; // compute the state energy err and state trans err at each position of each instance for (int j = 0; j < sl; ++j) { int idx = *(label + i * max_seq_length_ + j); if (idx >= 0 && idx < state_num_) { *(ptr_state_err + i * gs + j * state_num_ + idx) += 1; } else { // TODO } if ( j >= sl - 1 ) continue; int idx_next = *(label + i * max_seq_length_ + j + 1); if (idx >= 0 && idx < state_num_ && idx_next >= 0 && idx_next < state_num_) { *(ptr_trans_err + i * gs + j * state_num_ * state_num_ + idx * state_num_ + idx_next) += 1; } else { // TODO } } // Backward to input blob, bottom_dif = Mu' dot state_err' caffe_cpu_gemm(CblasTrans, CblasTrans, feature_num_, sl, state_num_, (Dtype)1., mu, state_err + i * gs, (Dtype)0., ptr_bottom_diff + i * ts); // Backward to pi, pi += state_err(0) caffe_add(state_num_, pi_diff, state_err + i * gs, ptr_pi_diff); // Backward to mu, mu += state_err' dot bottom[0]' caffe_cpu_gemm(CblasTrans, CblasTrans, state_num_, feature_num_, sl, (Dtype)1., state_err + i * gs, feature_table + i * gs, (Dtype)1., ptr_mu_diff); // Backward to tr, sum_t(state_trans_err(t)) caffe_cpu_gemv(CblasNoTrans, state_num_ * state_num_, sl, (Dtype)1., trans_err + i * eps, multiplier_seq_len_.cpu_data(), (Dtype)0., ptr_tr_diff); } } }