void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); caffe_copy(prob_.count(), prob_data, bottom_diff); const Dtype* label = bottom[1]->cpu_data(); int dim = prob_.count() / outer_num_; int count = 0; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; ++j) { const int label_value = static_cast<int>(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { bottom_diff[i * dim + c * inner_num_ + j] = 0; } } else { bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; ++count; } } } // Scale gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; if (normalize_) { caffe_scal(prob_.count(), loss_weight / count, bottom_diff); } else { caffe_scal(prob_.count(), loss_weight / outer_num_, bottom_diff); } } }
void HingeLossLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL)<< this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* label = bottom[1]->cpu_data(); int num = bottom[0]->num(); int count = bottom[0]->count(); int dim = count / num; for (int i = 0; i < num; ++i) { bottom_diff[i * dim + static_cast<int>(label[i])] *= -1; } const Dtype loss_weight = top[0]->cpu_diff()[0]; switch (this->layer_param_.hinge_loss_param().norm()) { case HingeLossParameter_Norm_L1: caffe_cpu_sign(count, bottom_diff, bottom_diff); caffe_scal(count, loss_weight / num, bottom_diff); break; case HingeLossParameter_Norm_L2: caffe_scal(count, loss_weight * 2 / num, bottom_diff); break; default: LOG(FATAL) << "Unknown Norm"; } } }
void Blob<Dtype>::scale_diff(Dtype scale_factor) { Dtype* diff; if (!diff_) { return; } switch (diff_->head()) { case SyncedMemory::SYNCED_PRV: case SyncedMemory::HEAD_AT_PRV: diff = mutable_prv_diff(); caffe_scal(prv_diff_count(), scale_factor, diff); break; case SyncedMemory::HEAD_AT_CPU: diff = mutable_cpu_diff(); caffe_scal(count_, scale_factor, diff); return; case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: #ifndef CPU_ONLY diff = mutable_gpu_diff(); caffe_gpu_scal(count_, scale_factor, diff); return; #else NO_GPU; #endif case SyncedMemory::UNINITIALIZED: return; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << diff_->head(); } }
void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { Dtype* top_diff = top[0]->mutable_cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); if (this->phase_ == TRAIN) { if (drop_batch_){ Dtype drop = rand_vec_->cpu_data()[0]; // scale + mask caffe_scal(top[0]->count(), Dtype(scale_ * drop), top_diff); caffe_copy(top[0]->count(), top_diff, bottom_diff); } else{ // scale caffe_scal(top[0]->count(), scale_, top_diff); // multiply mask vector<Blob<Dtype>*> scale_bottom(2, NULL); scale_bottom[0] = bottom[0]; scale_bottom[1] = rand_vec_; const vector<Blob<Dtype>*> scale_top(1, top[0]); vector<bool> prop_down(2, true); prop_down[1] = false; scale_layer_->Backward(scale_top, prop_down, scale_bottom); } } else { caffe_copy(top[0]->count(), top_diff, bottom_diff); } } }
void PowerLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int count = bottom[0]->count(); const Dtype* top_diff = top[0]->cpu_diff(); if (diff_scale_ == Dtype(0) || power_ == Dtype(1)) { caffe_set(count, diff_scale_, bottom_diff); } else { const Dtype* bottom_data = bottom[0]->cpu_data(); // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1) // = diff_scale * y / (shift + scale * x) if (power_ == Dtype(2)) { // Special case for y = (shift + scale * x)^2 // -> dy/dx = 2 * scale * (shift + scale * x) // = diff_scale * shift + diff_scale * scale * x caffe_cpu_axpby( count, diff_scale_ * scale_, bottom_data, Dtype(0), bottom_diff); if (shift_ != Dtype(0)) { caffe_add_scalar(count, diff_scale_ * shift_, bottom_diff); } } else if (shift_ == Dtype(0)) { // Special case for y = (scale * x)^power // -> dy/dx = scale * power * (scale * x)^(power - 1) // = scale * power * (scale * x)^power * (scale * x)^(-1) // = power * y / x const Dtype* top_data = top[0]->cpu_data(); caffe_div(count, top_data, bottom_data, bottom_diff); caffe_scal(count, power_, bottom_diff); } else { caffe_copy(count, bottom_data, bottom_diff); if (scale_ != Dtype(1)) { caffe_scal(count, scale_, bottom_diff); } if (shift_ != Dtype(0)) { caffe_add_scalar(count, shift_, bottom_diff); } const Dtype* top_data = top[0]->cpu_data(); caffe_div<Dtype>(count, top_data, bottom_diff, bottom_diff); if (diff_scale_ != Dtype(1)) { caffe_scal(count, diff_scale_, bottom_diff); } } } if (diff_scale_ != Dtype(0)) { caffe_mul(count, top_diff, bottom_diff, bottom_diff); } } }
void TripletRankingHingeLossLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool> &propagate_down, const vector<Blob<Dtype>*>& bottom){ const Dtype* orignalcode; const Dtype* similarcode; const Dtype* diffrcode; if (propagate_down[0]) { for (int i = 0; i < 3; ++i) { for (int j = 0; j < batch_; ++j){ Dtype* bout = bottom[i]->mutable_cpu_diff(); orignalcode = bottom[0]->cpu_data() + bottom[0]->offset(j); similarcode = bottom[1]->cpu_data() + bottom[1]->offset(j); diffrcode = bottom[2]->cpu_data() + bottom[2]->offset(j); if (i == 0){ if (dist_sq_.cpu_data()[j]>Dtype(0.0)){ caffe_sub(dim_, diffrcode, similarcode, gradient.mutable_cpu_data());// the distance of F- and F+ caffe_scal(dim_, Dtype(2) / Dtype(batch_), gradient.mutable_cpu_data()); } else caffe_set(dim_, Dtype(0.0), gradient.mutable_cpu_data()); } if (i == 1){ if (dist_sq_.cpu_data()[j] > Dtype(0.0)){ caffe_sub(dim_, similarcode, orignalcode, gradient.mutable_cpu_data());// the distance of F+ and F caffe_scal(dim_, Dtype(2) / Dtype(batch_), gradient.mutable_cpu_data()); } else caffe_set(dim_, Dtype(0.0), gradient.mutable_cpu_data()); } if (i == 2){ if (dist_sq_.cpu_data()[j] > Dtype(0.0)){ caffe_sub(dim_, orignalcode, diffrcode, gradient.mutable_cpu_data()); // the distance of F and F- caffe_scal(dim_, Dtype(2) / Dtype(batch_), gradient.mutable_cpu_data()); } else caffe_set(dim_, Dtype(0.0), gradient.mutable_cpu_data()); } caffe_scal(dim_, Dtype(2.0), gradient.mutable_cpu_data()); caffe_copy(dim_, gradient.cpu_data(), bout + (j*dim_)); } } } }
void BilateralFilterLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { bilateral_interface_cpu_->Backward( propagate_down[0], propagate_down[1], bottom[0], bottom[1], top[0]); // Scale gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; if(propagate_down[0]) { caffe_scal(bottom[0]->count(), loss_weight, bottom[0]->mutable_cpu_diff()); } if(propagate_down[1]) { caffe_scal(bottom[1]->count(), loss_weight, bottom[1]->mutable_cpu_diff()); } }
void FocalLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { // data Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); const Dtype* label = bottom[1]->cpu_data(); // intermidiate const Dtype* log_prob_data = log_prob_.cpu_data(); const Dtype* power_prob_data = power_prob_.cpu_data(); int count = 0; int channels = bottom[0]->shape(softmax_axis_); int dim = prob_.count() / outer_num_; const Dtype eps = 1e-10; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; ++j) { // label const int label_value = static_cast<int>(label[i * inner_num_ + j]); // ignore label if (has_ignore_label_ && label_value == ignore_label_) { for (int c = 0; c < channels; ++c) { bottom_diff[i * dim + c * inner_num_ + j] = 0; } continue; } // the gradient from FL w.r.t p_t, here ignore the `sign` int ind_i = i * dim + label_value * inner_num_ + j; // index of ground-truth label Dtype grad = 0 - gamma_ * (power_prob_data[ind_i] / std::max(1 - prob_data[ind_i], eps)) * log_prob_data[ind_i] + power_prob_data[ind_i] / prob_data[ind_i]; // the gradient w.r.t input data x for (int c = 0; c < channels; ++c) { int ind_j = i * dim + c * inner_num_ + j; if(c == label_value) { CHECK_EQ(ind_i, ind_j); // if i == j, (here i,j are refered for derivative of softmax) bottom_diff[ind_j] = grad * prob_data[ind_i] * (prob_data[ind_i] - 1); } else { // if i != j, (here i,j are refered for derivative of softmax) bottom_diff[ind_j] = grad * prob_data[ind_i] * prob_data[ind_j]; } } // count ++count; } } // Scale gradient Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count); caffe_scal(prob_.count(), loss_weight, bottom_diff); } }
void SigmoidCrossEntropyWithValidLabelLossLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { // First, compute the diff const int count = bottom[0]->count(); const int num = bottom[0]->num(); const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data(); const Dtype* target = bottom[1]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_sub(count, sigmoid_output_data, target, bottom_diff); if (bottom.size() == 3) { // a valid label is specified const Dtype* valid = bottom[2]->cpu_data(); for (int i = 0; i < count; i++) { if (! valid[i]) { bottom_diff[i] = 0; } } } // Scale down gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; caffe_scal(count, loss_weight / num, bottom_diff); } }
void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) { double regularization = this->layer_param_.inner_product_param().regularization()/2; if (this->param_propagate_down_[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* bottom_data = (*bottom)[0]->cpu_data(); // Gradient with respect to weight caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff()); if (regularization > 0) { caffe_scal(this->blobs_[0]->count(), Dtype(1.0 + regularization), this->blobs_[0]->mutable_cpu_diff()); } } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bias caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff, bias_multiplier_.cpu_data(), (Dtype)0., this->blobs_[1]->mutable_cpu_diff()); } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bottom data caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., (*bottom)[0]->mutable_cpu_diff()); } }
void NormalizeLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { for (int i = 0; i < bottom.size(); ++i) { const Dtype* bottom_data = bottom[i]->cpu_data(); const Dtype* top_diff = top[i]->cpu_diff(); Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); Dtype scal; for (int n=0; n <num_; n++){ switch (op_) { case NormalizeParameter_NormalizeOp_DEMEAN: caffe_copy(imSz_, top_diff + n * imSz_, bottom_diff + n * imSz_); break; case NormalizeParameter_NormalizeOp_SDSCALE: caffe_copy(imSz_, bottom_data + n * imSz_, this->blobs_[0]->mutable_cpu_data()); caffe_copy(imSz_, top_diff + n * imSz_, this->blobs_[0]->mutable_cpu_diff()); //Find the Scaling Factor caffe_cpu_zero_mean(imSz_, this->blobs_[0]->mutable_cpu_data()); scal = caffe_cpu_dot<Dtype>(imSz_, this->blobs_[0]->cpu_data(), this->blobs_[0]->cpu_data()); //Apply the scaling to the gradients caffe_scal(imSz_, Dtype(1.0 / scal), this->blobs_[0]->mutable_cpu_diff()); caffe_copy(imSz_, this->blobs_[0]->cpu_diff() , bottom_diff + n * imSz_); break; default: LOG(FATAL) << "Unknown elementwise operation."; } } } } }
void NormalizeLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { for (int i = 0; i < bottom.size(); ++i) { const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* top_data = top[i]->mutable_cpu_data(); Dtype scal; for (int n = 0; n < this->num_; ++n) { caffe_copy(imSz_, bottom_data + n * imSz_, this->blobs_[0]->mutable_cpu_data()); switch (op_) { case NormalizeParameter_NormalizeOp_DEMEAN: caffe_cpu_zero_mean(imSz_, this->blobs_[0]->mutable_cpu_data()); break; case NormalizeParameter_NormalizeOp_SDSCALE: caffe_cpu_zero_mean(imSz_, this->blobs_[0]->mutable_cpu_data()); scal = caffe_cpu_dot<Dtype>(imSz_, this->blobs_[0]->cpu_data(), this->blobs_[0]->cpu_data()); caffe_scal(imSz_, Dtype(1.0 / scal), this->blobs_[0]->mutable_cpu_data()); break; default: LOG(FATAL) << "Unknown elementwise operation."; } caffe_copy(imSz_, this->blobs_[0]->cpu_data(), top_data + n * imSz_); } } }
void Softmax2WithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type_name() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); const Dtype* top_prob_data = top_prob_.cpu_data(); caffe_copy(prob_.count(), prob_data, bottom_diff); const Dtype* label = (*bottom)[1]->cpu_data(); int num = prob_.num(); int dim = prob_.count() / num; int top_dim = top_prob_.count() / num; int spatial_dim = prob_.height() * prob_.width(); for (int i = 0; i < num; ++i) { for (int j = 0; j < spatial_dim; ++j) { int label_v = static_cast<int>(label[i * spatial_dim + j]); int top_label_v = top_dict_.cpu_data()[label_v]; bottom_diff[i * dim + label_v * spatial_dim + j] -= lambda_; for (int k = 0; k < prob_.channels(); ++k) { if (top_label_v == top_dict_.cpu_data()[k]) { bottom_diff[i * dim + k * spatial_dim +j] -= (1 - lambda_) * prob_data[i * dim + k * spatial_dim + j] / top_prob_data[i * top_dim + top_label_v * spatial_dim + j]; } } } } // Scale gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; caffe_scal(prob_.count(), loss_weight / num / spatial_dim, bottom_diff); } }
void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type_name() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); caffe_copy(prob_.count(), prob_data, bottom_diff); const Dtype* label = bottom[1]->cpu_data(); int num = prob_.num(); int dim = prob_.count() / num; int spatial_dim = prob_.height() * prob_.width(); for (int i = 0; i < num; ++i) { for (int j = 0; j < spatial_dim; ++j) { bottom_diff[i * dim + static_cast<int>(label[i * spatial_dim + j]) * spatial_dim + j] -= 1; } } // Scale gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; caffe_scal(prob_.count(), loss_weight / num / spatial_dim, bottom_diff); } }
void WeightPlusLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){ const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* weight = this->blobs_[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_scal(dim_, Dtype(2.0), weight_two_.mutable_cpu_data()); // gradient with respect to weight for (int n = 0; n < batch_; ++n){ int offset = n*dim_; caffe_mul(dim_, weight_two_.cpu_data(), bottom_data + offset, data_meta_.mutable_cpu_data() + offset); caffe_mul(dim_, top_diff + offset, data_meta_.cpu_data() + offset, data_meta_.mutable_cpu_data() + offset); caffe_axpy(dim_, Dtype(1.0), data_meta_.cpu_data() + offset, blobs_[0]->mutable_cpu_diff()); } // gradient with respect to bottom data if (propagate_down[0]){ for (int n = 0; n < batch_; ++n){ int offset = n*dim_; caffe_mul(dim_, top_diff + offset, weight_two_.cpu_data(), bottom_diff + offset); } } }
void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { // First, compute the diff const int count = bottom[0]->count(); const int num = bottom[0]->num(); const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data(); const Dtype* target = bottom[1]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_sub(count, sigmoid_output_data, target, bottom_diff); // Scale down gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; Dtype normalizer = Dtype(num); if (this->layer_param_.loss_param().normalization() == LossParameter_NormalizationMode_NONE) normalizer = 1.f; #ifdef USE_MLSL else { // We assume local bs is same across all nodes normalizer *= mn::get_group_size(); } #endif caffe_scal(count, loss_weight / normalizer, bottom_diff); } }
void ReductionLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* mult_data = NULL; if (sum_multiplier_.count() > 0) { mult_data = sum_multiplier_.cpu_data(); } Dtype* top_data = top[0]->mutable_cpu_data(); for (int i = 0; i < num_; ++i) { switch (op_) { case ReductionParameter_ReductionOp_SUM: case ReductionParameter_ReductionOp_MEAN: *top_data = caffe_cpu_dot(dim_, mult_data, bottom_data); break; case ReductionParameter_ReductionOp_ASUM: *top_data = caffe_cpu_asum(dim_, bottom_data); break; case ReductionParameter_ReductionOp_SUMSQ: *top_data = caffe_cpu_dot(dim_, bottom_data, bottom_data); break; default: LOG(FATAL) << "Unknown reduction op: " << ReductionParameter_ReductionOp_Name(op_); } bottom_data += dim_; ++top_data; } if (coeff_ != Dtype(1)) { // Reset the top_data pointer. top_data = top[0]->mutable_cpu_data(); caffe_scal(num_, coeff_, top_data); } }
void SigmoidCrossEntropyLossMaskLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { // First, compute the diff const int count = bottom[0]->count(); // const int num = bottom[0]->num(); const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data(); const Dtype* target = bottom[1]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_sub(count, sigmoid_output_data, target, bottom_diff); //////////////////////////////// my change ///////////////////////////////////// const Dtype* use_mask = mask_.cpu_data(); for (int i = 0; i < count; ++i){ if(negsig){bottom_diff[i]=0;} if(use_mask[i]==0){bottom_diff[i]=0;} } // LOG(INFO) << "Backward use_mask num" << mask_.asum_data(); /////////////////////////////// my change ///////////////////////////////////// // Scale down gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; caffe_scal(count, loss_weight / mask_.asum_data(), bottom_diff); } }
void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); Dtype* mask = rand_vec_->mutable_cpu_data(); const int count = rand_vec_->count(); if (this->phase_ == TRAIN) { switch (drop_type_){ case DropoutParameter_DropType_BERNOULLI: { // Create random numbers caffe_rng_bernoulli(count, Dtype(1. - threshold_), mask); break; } case DropoutParameter_DropType_GAUSSIAN: { caffe_rng_gaussian(count, Dtype(mu_), Dtype(sigma_), mask); // clip to be in [0,1] for (int i = 0; i < rand_vec_->count(); ++i){ Dtype m = mask[i]; mask[i] = m > 1 ? 1 : (m < 0 ? 0 : m); } break; } case DropoutParameter_DropType_UNIFORM: { caffe_rng_uniform(count, Dtype(a_), Dtype(b_), mask); break; } } if (drop_batch_){ Dtype drop = mask[0]; caffe_copy(top[0]->count(), bottom_data, top_data); caffe_scal(top[0]->count(), Dtype(scale_ * drop), top_data); } else{ vector<Blob<Dtype>*> scale_bottom(2, NULL); scale_bottom[0] = bottom[0]; scale_bottom[1] = rand_vec_; const vector<Blob<Dtype>*> scale_top(1, top[0]); scale_layer_->Forward(scale_bottom, scale_top); caffe_scal(top[0]->count(), scale_, top_data); } } else { caffe_copy(bottom[0]->count(), bottom_data, top_data); } }
void BatchContrastiveLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* label = bottom[1]->cpu_data(); int num = bottom[0]->num(); caffe_set(num*num, Dtype(0), bottom_diff); if (max_only_) { if (max_positive_1_ >= 0 && max_positive_2_ >= 0) { bottom_diff[max_positive_1_ * num + max_positive_2_] = positive_weight_; } if (min_negative_1_ >= 0 && min_negative_2_ >= 0) { bottom_diff[min_negative_1_ * num + min_negative_2_] = -negative_weight_; } } else { for (int i = 0; i < num; ++i) { for (int j = i + 1; j < num; ++j) { if (label[i] == label[j]) { if (bottom_data[i*num + j] > positive_margin_) { bottom_diff[i*num + j] = positive_weight_; } } else { if (bottom_data[i*num + j] < negative_margin_) { bottom_diff[i*num + j] = -negative_weight_; } } } } } const Dtype loss_weight = top[0]->cpu_diff()[0]; if (max_only_) { caffe_scal(bottom[0]->count(), loss_weight / 2, bottom_diff); } else { caffe_scal(bottom[0]->count(), loss_weight / num, bottom_diff); } } }
void IslandLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { // Gradient with respect to centers if (this->param_propagate_down_[0]) { const Dtype* label = bottom[1]->cpu_data(); Dtype* center_diff = this->blobs_[0]->mutable_cpu_diff(); const Dtype* center_data = this->blobs_[0]->cpu_data(); Dtype* variation_sum_data = variation_sum_.mutable_cpu_data(); const Dtype* distance_data = distance_.cpu_data(); // \sum_{y_i==j} caffe_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data()); for (int n = 0; n < N_; n++) { int count = 0; for (int m = 0; m < M_; m++) { const int label_value = static_cast<int>(label[m]); if (label_value == n) { count++; caffe_sub(K_, variation_sum_data + n * K_, distance_data + m * K_, variation_sum_data + n * K_); } } caffe_axpy(K_, (Dtype)1. / (count + (Dtype)1.), variation_sum_data + n * K_, center_diff + n * K_); } //xcenter_loss backward for (int n = 0; n < N_; ++n){ Dtype double_center_module_n = center_module_[n] * center_module_[n]; for (int i = 0; i < N_; ++i){ if (i == n){ continue; } else{ // 更新i Dtype alpha = center_module_[n] * center_module_[i]; Dtype belta = center_dot_[n][i] / (alpha*double_center_module_n); //alpha*c_i-beta*c_n for (int k = 0; k < K_; ++k){ //由于重复计算,实际计算的次数为2因此 center_diff的值需要乘以2 center_diff[n*K_ + k] = 2*lambda_/(N_-1)*(alpha*center_data[i*K_ + k] - belta*center_data[n*K_ + k]); } } } } } // Gradient with respect to bottom data if (propagate_down[0]) { caffe_copy(M_ * K_, distance_.cpu_data(), bottom[0]->mutable_cpu_diff()); caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff()); } if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } }
void EuclideanLossWithIgnoreLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { int count = bottom[0]->count(); //Number of elements in the batch int bCount = bottom[0]->count(1, bottom[0]->num_axes()); int lbCount = bottom[1]->count(1, bottom[1]->num_axes()); int N = bottom[0]->shape(0); if (lCount_ == Dtype(0)){ LOG(INFO) << "EuclideanLossWithIgnore was Silent for this batch"; return; } //Compute the gradients for (int i = 0; i < 2; ++i) { const Dtype sign = (i == 0) ? 1 : -1; const Dtype alpha = sign * top[0]->cpu_diff()[0] / lCount_; Dtype Z; const Dtype* botZData = bottom[nc_]->cpu_data(); Dtype* botDiff = bottom[i]->mutable_cpu_diff(); const Dtype* labels = bottom[1]->cpu_data(); Dtype* diff = diff_.mutable_cpu_data(); const Dtype* diffC = diff_.cpu_data(); if (propagate_down[i]) { for (int n=0; n < N; ++n){ if (labels[bCount] == Dtype(1)){ if (is_normalize_){ Z = caffe_cpu_dot(bCount, botZData, botZData); if (Z>0){ caffe_scal(count, Z, diff); } } caffe_cpu_axpby( bCount, // count alpha, // alpha diffC, // a Dtype(0), // beta botDiff); // b } labels += lbCount; diff += bCount; diffC += bCount; if (nc_==0){ botZData += bCount; }else{ botZData += lbCount; } if (i==0){ botDiff += bCount; }else { botDiff += lbCount; } } } } }
void WeightedSoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); caffe_copy(prob_.count(), prob_data, bottom_diff); const Dtype* label = bottom[1]->cpu_data(); const Dtype* sample_weight = bottom[2]->cpu_data(); int num = prob_.num(); int dim = prob_.count() / num; int spatial_dim = prob_.height() * prob_.width(); int count = 0; for (int i = 0; i < num; ++i) { for (int j = 0; j < spatial_dim; ++j) { const int label_value = static_cast<int>(label[i * spatial_dim + j]); if (has_ignore_label_ && label_value == ignore_label_) { for (int c = 0; c < bottom[0]->channels(); ++c) { bottom_diff[i * dim + c * spatial_dim + j] = 0; } } else { bottom_diff[i * dim + label_value * spatial_dim + j] -= 1; Dtype w = sample_weight[i * spatial_dim + j]; for (int k = 0; k < dim; ++k) { bottom_diff[i * dim + k * spatial_dim + j] *= w; } ++count; } } } // Scale gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; if (normalize_) { caffe_scal(prob_.count(), loss_weight / count, bottom_diff); } else { caffe_scal(prob_.count(), loss_weight / num, bottom_diff); } } }
void SigmoidWeightedCrossEntropyLossLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[2]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to weight inputs."; } if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { // First, compute the diff const int count = bottom[0]->count(); const int num = bottom[0]->num(); const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data(); const Dtype* target = bottom[1]->cpu_data(); const Dtype* weight = bottom[2]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); Dtype* tmp = new Dtype[count << 1]; Dtype* tmp1 = tmp + count; // diff: 1/2 caffe_set(count, (Dtype)0.5, bottom_diff); // diff: 1/2 * \hat{p} caffe_mul(count, bottom_diff, sigmoid_output_data, bottom_diff); // diff: 1/2 * (1-p) * \hat{p} caffe_set(count, (Dtype)1, tmp1); caffe_sub(count, tmp1, target, tmp); caffe_mul(count, bottom_diff, tmp, bottom_diff); // diff: 1/2(1-w) * (1-p) * \hat{p} caffe_sub(count, tmp1, weight, tmp); caffe_div(count, bottom_diff, tmp, bottom_diff); // tmp: 1-\hat{p} caffe_sub(count, tmp1, sigmoid_output_data, tmp); // tmp: p * (1-\hat{p}) caffe_mul(count, tmp, target, tmp); // tmp: -1/2 * p * (1-\hat{p}) caffe_set(count, (Dtype)-0.5, tmp1); caffe_mul(count, tmp, tmp1, tmp); // tmp: -1/2w * p * (1-\hat{p}) caffe_div(count, tmp, weight, tmp); // diff: -(1/2w * p * (1-\hat{p}) - 1/2(1-w) * (1-p) * \hat{p}) caffe_add(count, bottom_diff, tmp, bottom_diff); delete[] tmp; // Scale down gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; caffe_scal(count, loss_weight / num, bottom_diff); } }
void LogLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } const int count = bottom[0]->count(); const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_cpu_copy(count, bottom_data, bottom_diff); if (input_scale_ != Dtype(1)) { caffe_scal(count, input_scale_, bottom_diff); } if (input_shift_ != Dtype(0)) { caffe_add_scalar(count, input_shift_, bottom_diff); } caffe_powx(count, bottom_diff, Dtype(-1), bottom_diff); if (backward_num_scale_ != Dtype(1)) { caffe_scal(count, backward_num_scale_, bottom_diff); } caffe_mul(count, top_diff, bottom_diff, bottom_diff); }
void MyNeuronLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& bottom){ const int count = top[0]->count(); const Dtype* top_diff = top[0]->cpu_diff(); if(propagate_down[0]){ const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_powx(count, bottom_data, Dtype(power_ - 1), bottom_diff); caffe_scal(count, Dtype(power_), bottom_diff); caffe_mul(count, bottom_diff, top_diff, bottom_diff); } }
void ApplyUpdate(shared_ptr<Net<Dtype> > net, Dtype lr, int node_id) { const vector<shared_ptr<Blob<Dtype> > >& net_params = net -> params(); for (int i = 0; i < net_params.size(); i++) { auto param = net_params[i]; recv_diff(param.get(), node_id); caffe_scal(param -> count(), lr, param -> mutable_cpu_diff()); param -> Update(); send_data(param.get(), node_id); } }
void ExpLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } const int count = bottom[0]->count(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_mul(count, top_data, top_diff, bottom_diff); if (inner_scale_ != Dtype(1)) { caffe_scal(count, inner_scale_, bottom_diff); } }
void LogLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int count = bottom[0]->count(); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); if (input_scale_ == Dtype(1) && input_shift_ == Dtype(0)) { caffe_log(count, bottom_data, top_data); } else { caffe_cpu_copy(count, bottom_data, top_data); if (input_scale_ != Dtype(1)) { caffe_scal(count, input_scale_, top_data); } if (input_shift_ != Dtype(0)) { caffe_add_scalar(count, input_shift_, top_data); } caffe_log(count, top_data, top_data); } if (base_scale_ != Dtype(1)) { caffe_scal(count, base_scale_, top_data); } }
void InfogainLossLayer<Dtype, MItype, MOtype>::Backward_cpu( const vector<Blob<MOtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<MItype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down.size() > 2 && propagate_down[2]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to infogain inputs."; } if (propagate_down[0]) { const Dtype* prob_data = prob_.cpu_data(); const Dtype* bottom_label = bottom[1]->cpu_data(); const Dtype* infogain_mat = NULL; if (bottom.size() < 3) { infogain_mat = infogain_.cpu_data(); } else { infogain_mat = bottom[2]->cpu_data(); // H is provided as a "bottom" and might change. sum rows every time. sum_rows_of_H(bottom[2]); } const Dtype* sum_rows_H = sum_rows_H_.cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int_tp dim = bottom[0]->count() / outer_num_; int_tp count = 0; for (int_tp i = 0; i < outer_num_; ++i) { for (int_tp j = 0; j < inner_num_; ++j) { const int_tp label_value = static_cast<int_tp>(bottom_label[i * inner_num_ + j]); DCHECK_GE(label_value, 0); DCHECK_LT(label_value, num_labels_); if (has_ignore_label_ && label_value == ignore_label_) { for (int_tp l = 0; l < num_labels_; ++l) { bottom_diff[i * dim + l * inner_num_ + j] = 0; } } else { for (int_tp l = 0; l < num_labels_; ++l) { bottom_diff[i * dim + l * inner_num_ + j] = prob_data[i*dim + l*inner_num_ + j]*sum_rows_H[label_value] - infogain_mat[label_value * num_labels_ + l]; } ++count; } } } // Scale gradient Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count); caffe_scal(bottom[0]->count(), loss_weight, bottom_diff); } }