void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); caffe_copy(prob_.count(), prob_data, bottom_diff); const Dtype* label = bottom[1]->cpu_data(); int dim = prob_.count() / outer_num_; int count = 0; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; ++j) { const int label_value = static_cast<int>(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { bottom_diff[i * dim + c * inner_num_ + j] = 0; } } else { bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; ++count; } } } // Scale gradient Dtype norm = get_normalizer(normalization_, count); if (norm > 1e-8) { Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count); caffe_scal(prob_.count(), loss_weight, bottom_diff); } } }
void FocalLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { // data Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); const Dtype* label = bottom[1]->cpu_data(); // intermidiate const Dtype* log_prob_data = log_prob_.cpu_data(); const Dtype* power_prob_data = power_prob_.cpu_data(); int count = 0; int channels = bottom[0]->shape(softmax_axis_); int dim = prob_.count() / outer_num_; const Dtype eps = 1e-10; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; ++j) { // label const int label_value = static_cast<int>(label[i * inner_num_ + j]); // ignore label if (has_ignore_label_ && label_value == ignore_label_) { for (int c = 0; c < channels; ++c) { bottom_diff[i * dim + c * inner_num_ + j] = 0; } continue; } // the gradient from FL w.r.t p_t, here ignore the `sign` int ind_i = i * dim + label_value * inner_num_ + j; // index of ground-truth label Dtype grad = 0 - gamma_ * (power_prob_data[ind_i] / std::max(1 - prob_data[ind_i], eps)) * log_prob_data[ind_i] + power_prob_data[ind_i] / prob_data[ind_i]; // the gradient w.r.t input data x for (int c = 0; c < channels; ++c) { int ind_j = i * dim + c * inner_num_ + j; if(c == label_value) { CHECK_EQ(ind_i, ind_j); // if i == j, (here i,j are refered for derivative of softmax) bottom_diff[ind_j] = grad * prob_data[ind_i] * (prob_data[ind_i] - 1); } else { // if i != j, (here i,j are refered for derivative of softmax) bottom_diff[ind_j] = grad * prob_data[ind_i] * prob_data[ind_j]; } } // count ++count; } } // Scale gradient Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count); caffe_scal(prob_.count(), loss_weight, bottom_diff); } }
void SoftmaxWithLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // The forward pass computes the softmax prob values. softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); const Dtype* prob_data = prob_.cpu_data(); const Dtype* label = bottom[1]->cpu_data(); int_tp dim = prob_.count() / outer_num_; int_tp count = 0; Dtype loss = 0; for (int_tp i = 0; i < outer_num_; ++i) { for (int_tp j = 0; j < inner_num_; j++) { const int_tp label_value = static_cast<int_tp>(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { continue; } DCHECK_GE(label_value, 0); DCHECK_LT(label_value, prob_.shape(softmax_axis_)); loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j], Dtype(FLT_MIN))); ++count; } } top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count); if (top.size() == 2) { top[1]->ShareData(prob_); } }
void SmoothL1LossOHEMLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); // d := b0 - b1 if (has_weights_) { caffe_mul( count, bottom[2]->cpu_data(), diff_.cpu_data(), diff_.mutable_cpu_data()); // d := w * (b0 - b1) } #ifdef _OPENMP #pragma omp parallel for #endif for (int index = 0; index < count; index++) { Dtype val = diff_.cpu_data()[index]; Dtype abs_val = abs(val); if (abs_val < 1) { errors_.mutable_cpu_data()[index] = 0.5 * val * val; } else { errors_.mutable_cpu_data()[index] = abs_val - 0.5; } } Dtype loss = caffe_cpu_asum(count, errors_.cpu_data()); Dtype pre_fixed_normalizer = this->layer_param_.loss_param().pre_fixed_normalizer(); top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, pre_fixed_normalizer); // Output per-instance loss if (top.size() >= 2) { #ifdef _OPENMP #pragma omp parallel for collapse(2) #endif for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; j++) { Dtype sum = 0; for (int c = 0; c < bottom[0]->channels(); ++c) { sum += errors_.cpu_data()[(i * bottom[0]->channels() + c) * inner_num_ + j]; } top[1]->mutable_cpu_data()[i * inner_num_ + j] = sum; } } } }
void InfogainLossLayer<Dtype, MItype, MOtype>::Backward_cpu( const vector<Blob<MOtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<MItype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down.size() > 2 && propagate_down[2]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to infogain inputs."; } if (propagate_down[0]) { const Dtype* prob_data = prob_.cpu_data(); const Dtype* bottom_label = bottom[1]->cpu_data(); const Dtype* infogain_mat = NULL; if (bottom.size() < 3) { infogain_mat = infogain_.cpu_data(); } else { infogain_mat = bottom[2]->cpu_data(); // H is provided as a "bottom" and might change. sum rows every time. sum_rows_of_H(bottom[2]); } const Dtype* sum_rows_H = sum_rows_H_.cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int_tp dim = bottom[0]->count() / outer_num_; int_tp count = 0; for (int_tp i = 0; i < outer_num_; ++i) { for (int_tp j = 0; j < inner_num_; ++j) { const int_tp label_value = static_cast<int_tp>(bottom_label[i * inner_num_ + j]); DCHECK_GE(label_value, 0); DCHECK_LT(label_value, num_labels_); if (has_ignore_label_ && label_value == ignore_label_) { for (int_tp l = 0; l < num_labels_; ++l) { bottom_diff[i * dim + l * inner_num_ + j] = 0; } } else { for (int_tp l = 0; l < num_labels_; ++l) { bottom_diff[i * dim + l * inner_num_ + j] = prob_data[i*dim + l*inner_num_ + j]*sum_rows_H[label_value] - infogain_mat[label_value * num_labels_ + l]; } ++count; } } } // Scale gradient Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count); caffe_scal(bottom[0]->count(), loss_weight, bottom_diff); } }
void FocalLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // The forward pass computes the softmax prob values. softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); // compute all needed values compute_intermediate_values_of_cpu(); const Dtype* label = bottom[1]->cpu_data(); const Dtype* log_prob_data = log_prob_.cpu_data(); const Dtype* power_prob_data = power_prob_.cpu_data(); // compute loss int count = 0; int channels = prob_.shape(softmax_axis_); int dim = prob_.count() / outer_num_; Dtype loss = 0; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; j++) { const int label_value = static_cast<int>(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { continue; } DCHECK_GE(label_value, 0); DCHECK_LT(label_value, channels); const int index = i * dim + label_value * inner_num_ + j; // FL(p_t) = -(1 - p_t) ^ gamma * log(p_t) // loss -= std::max(power_prob_data[index] * log_prob_data[index], // Dtype(log(Dtype(FLT_MIN)))); loss -= power_prob_data[index] * log_prob_data[index]; ++count; } } // prob top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count); if (top.size() == 2) { top[1]->ShareData(prob_); } }
void InfogainLossLayer<Dtype, MItype, MOtype>::Forward_cpu( const vector<Blob<MItype>*>& bottom, const vector<Blob<MOtype>*>& top) { // The forward pass computes the softmax prob values. softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); const Dtype* prob_data = prob_.cpu_data(); const Dtype* bottom_label = bottom[1]->cpu_data(); const Dtype* infogain_mat = NULL; if (bottom.size() < 3) { infogain_mat = infogain_.cpu_data(); } else { infogain_mat = bottom[2]->cpu_data(); } int_tp count = 0; Dtype loss = 0; for (int_tp i = 0; i < outer_num_; ++i) { for (int_tp j = 0; j < inner_num_; j++) { const int_tp label_value = static_cast<int_tp>(bottom_label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { continue; } DCHECK_GE(label_value, 0); DCHECK_LT(label_value, num_labels_); for (int_tp l = 0; l < num_labels_; l++) { loss -= infogain_mat[label_value * num_labels_ + l] * std::log(std::max( prob_data[i * inner_num_*num_labels_ + l * inner_num_ + j], Dtype(kLOG_THRESHOLD))); } ++count; } } top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count); if (top.size() == 2) { top[1]->ShareData(prob_); } }