void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const Dtype* prob_data = prob_.cpu_data();
    caffe_copy(prob_.count(), prob_data, bottom_diff);
    const Dtype* label = bottom[1]->cpu_data();
    int dim = prob_.count() / outer_num_;
    int count = 0;
    for (int i = 0; i < outer_num_; ++i) {
      for (int j = 0; j < inner_num_; ++j) {
        const int label_value = static_cast<int>(label[i * inner_num_ + j]);
        if (has_ignore_label_ && label_value == ignore_label_) {
          for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) {
            bottom_diff[i * dim + c * inner_num_ + j] = 0;
          }
        } else {
          bottom_diff[i * dim + label_value * inner_num_ + j] -= 1;
          ++count;
        }
      }
    }
    // Scale gradient
    Dtype norm = get_normalizer(normalization_, count);
    if (norm > 1e-8) {
      Dtype loss_weight = top[0]->cpu_diff()[0] /
                          get_normalizer(normalization_, count);
      caffe_scal(prob_.count(), loss_weight, bottom_diff);
    }
  }
}
Esempio n. 2
0
void FocalLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) 
{
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }

  if (propagate_down[0]) {
    // data
    Dtype* bottom_diff     = bottom[0]->mutable_cpu_diff();
    const Dtype* prob_data = prob_.cpu_data();
    const Dtype* label     = bottom[1]->cpu_data();
    // intermidiate  
    const Dtype* log_prob_data   = log_prob_.cpu_data();
    const Dtype* power_prob_data = power_prob_.cpu_data();

    int count       = 0;
    int channels    = bottom[0]->shape(softmax_axis_);
    int dim         = prob_.count() / outer_num_;
    const Dtype eps = 1e-10;

    for (int i = 0; i < outer_num_; ++i) {
      for (int j = 0; j < inner_num_; ++j) {
        // label
        const int label_value = static_cast<int>(label[i * inner_num_ + j]);
        
        // ignore label
        if (has_ignore_label_ && label_value == ignore_label_) {
          for (int c = 0; c < channels; ++c) {
            bottom_diff[i * dim + c * inner_num_ + j] = 0;
          }
          continue;
        }

        // the gradient from FL w.r.t p_t, here ignore the `sign`
        int ind_i  = i * dim + label_value * inner_num_ + j; // index of ground-truth label
        Dtype grad = 0 - gamma_ * (power_prob_data[ind_i] / std::max(1 - prob_data[ind_i], eps)) * log_prob_data[ind_i] 
                       + power_prob_data[ind_i] / prob_data[ind_i];
        // the gradient w.r.t input data x
        for (int c = 0; c < channels; ++c) {
          int ind_j = i * dim + c * inner_num_ + j;
          if(c == label_value) {
            CHECK_EQ(ind_i, ind_j);
            // if i == j, (here i,j are refered for derivative of softmax)
            bottom_diff[ind_j] = grad * prob_data[ind_i] * (prob_data[ind_i] - 1);
          } else {
            // if i != j, (here i,j are refered for derivative of softmax)
            bottom_diff[ind_j] = grad * prob_data[ind_i] * prob_data[ind_j];
          }
        }
        // count                    
        ++count;
      }
    }
    // Scale gradient
    Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count);
    caffe_scal(prob_.count(), loss_weight, bottom_diff);
  }
}
Esempio n. 3
0
void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  // The forward pass computes the softmax prob values.
  softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
  const Dtype* prob_data = prob_.cpu_data();
  const Dtype* label = bottom[1]->cpu_data();
  int_tp dim = prob_.count() / outer_num_;
  int_tp count = 0;
  Dtype loss = 0;
  for (int_tp i = 0; i < outer_num_; ++i) {
    for (int_tp j = 0; j < inner_num_; j++) {
      const int_tp label_value = static_cast<int_tp>(label[i * inner_num_ + j]);
      if (has_ignore_label_ && label_value == ignore_label_) {
        continue;
      }
      DCHECK_GE(label_value, 0);
      DCHECK_LT(label_value, prob_.shape(softmax_axis_));
      loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j],
                           Dtype(FLT_MIN)));
      ++count;
    }
  }
  top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count);
  if (top.size() == 2) {
    top[1]->ShareData(prob_);
  }
}
Esempio n. 4
0
void SmoothL1LossOHEMLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
    int count = bottom[0]->count();
  
    caffe_sub(
      count,
      bottom[0]->cpu_data(),
      bottom[1]->cpu_data(),
      diff_.mutable_cpu_data());    // d := b0 - b1
    if (has_weights_) {
      caffe_mul(
        count,
        bottom[2]->cpu_data(),
        diff_.cpu_data(),
        diff_.mutable_cpu_data());  // d := w * (b0 - b1)
    }

#ifdef _OPENMP
#pragma omp parallel for
#endif
    for (int index = 0; index < count; index++) {
      Dtype val = diff_.cpu_data()[index];
      Dtype abs_val = abs(val);
      if (abs_val < 1) {
        errors_.mutable_cpu_data()[index] = 0.5 * val * val;
      } else {
        errors_.mutable_cpu_data()[index] = abs_val - 0.5;
      }
    }

    Dtype loss = caffe_cpu_asum(count, errors_.cpu_data());

    Dtype pre_fixed_normalizer =
      this->layer_param_.loss_param().pre_fixed_normalizer();
    top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_,
      pre_fixed_normalizer);

    // Output per-instance loss
    if (top.size() >= 2) {
#ifdef _OPENMP
#pragma omp parallel for collapse(2)
#endif
        for (int i = 0; i < outer_num_; ++i) {
            for (int j = 0; j < inner_num_; j++) {
                Dtype sum = 0;
                for (int c = 0; c < bottom[0]->channels(); ++c) {
                    sum += errors_.cpu_data()[(i * bottom[0]->channels() + c) * inner_num_ + j];
                }
                top[1]->mutable_cpu_data()[i * inner_num_ + j] = sum;
            }
        }
    }
}
Esempio n. 5
0
void InfogainLossLayer<Dtype, MItype, MOtype>::Backward_cpu(
    const vector<Blob<MOtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<MItype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down.size() > 2 && propagate_down[2]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to infogain inputs.";
  }
  if (propagate_down[0]) {
    const Dtype* prob_data = prob_.cpu_data();
    const Dtype* bottom_label = bottom[1]->cpu_data();
    const Dtype* infogain_mat = NULL;
    if (bottom.size() < 3) {
      infogain_mat = infogain_.cpu_data();
    } else {
      infogain_mat = bottom[2]->cpu_data();
      // H is provided as a "bottom" and might change. sum rows every time.
      sum_rows_of_H(bottom[2]);
    }
    const Dtype* sum_rows_H = sum_rows_H_.cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const int_tp dim = bottom[0]->count() / outer_num_;
    int_tp count = 0;
    for (int_tp i = 0; i < outer_num_; ++i) {
      for (int_tp j = 0; j < inner_num_; ++j) {
        const int_tp label_value =
          static_cast<int_tp>(bottom_label[i * inner_num_ + j]);
        DCHECK_GE(label_value, 0);
        DCHECK_LT(label_value, num_labels_);
        if (has_ignore_label_ && label_value == ignore_label_) {
          for (int_tp l = 0; l < num_labels_; ++l) {
            bottom_diff[i * dim + l * inner_num_ + j] = 0;
          }
        } else {
          for (int_tp l = 0; l < num_labels_; ++l) {
            bottom_diff[i * dim + l * inner_num_ + j] =
               prob_data[i*dim + l*inner_num_ + j]*sum_rows_H[label_value]
               - infogain_mat[label_value * num_labels_ + l];
          }
          ++count;
        }
      }
    }
    // Scale gradient
    Dtype loss_weight = top[0]->cpu_diff()[0] /
                        get_normalizer(normalization_, count);
    caffe_scal(bottom[0]->count(), loss_weight, bottom_diff);
  }
}
Esempio n. 6
0
void FocalLossLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) 
{
  // The forward pass computes the softmax prob values.
  softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);

  // compute all needed values
  compute_intermediate_values_of_cpu();
  const Dtype* label           = bottom[1]->cpu_data();
  const Dtype* log_prob_data   = log_prob_.cpu_data();
  const Dtype* power_prob_data = power_prob_.cpu_data();

  // compute loss
  int count    = 0;
  int channels = prob_.shape(softmax_axis_);
  int dim      = prob_.count() / outer_num_;

  Dtype loss = 0;
  for (int i = 0; i < outer_num_; ++i) {
    for (int j = 0; j < inner_num_; j++) {
      const int label_value = static_cast<int>(label[i * inner_num_ + j]);
      if (has_ignore_label_ && label_value == ignore_label_) {
        continue;
      }
      DCHECK_GE(label_value, 0);
      DCHECK_LT(label_value, channels);
      const int index = i * dim + label_value * inner_num_ + j;
      // FL(p_t) = -(1 - p_t) ^ gamma * log(p_t)
      // loss -= std::max(power_prob_data[index] * log_prob_data[index],
      //                      Dtype(log(Dtype(FLT_MIN))));
      loss -= power_prob_data[index] * log_prob_data[index];
      ++count;
    }
  }

  // prob
  top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count);
  if (top.size() == 2) {
    top[1]->ShareData(prob_);
  }
}
Esempio n. 7
0
void InfogainLossLayer<Dtype, MItype, MOtype>::Forward_cpu(
    const vector<Blob<MItype>*>& bottom,
    const vector<Blob<MOtype>*>& top) {
  // The forward pass computes the softmax prob values.
  softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
  const Dtype* prob_data = prob_.cpu_data();
  const Dtype* bottom_label = bottom[1]->cpu_data();
  const Dtype* infogain_mat = NULL;
  if (bottom.size() < 3) {
    infogain_mat = infogain_.cpu_data();
  } else {
    infogain_mat = bottom[2]->cpu_data();
  }
  int_tp count = 0;
  Dtype loss = 0;
  for (int_tp i = 0; i < outer_num_; ++i) {
    for (int_tp j = 0; j < inner_num_; j++) {
      const int_tp label_value =
        static_cast<int_tp>(bottom_label[i * inner_num_ + j]);
      if (has_ignore_label_ && label_value == ignore_label_) {
        continue;
      }
      DCHECK_GE(label_value, 0);
      DCHECK_LT(label_value, num_labels_);
      for (int_tp l = 0; l < num_labels_; l++) {
        loss -= infogain_mat[label_value * num_labels_ + l] *
          std::log(std::max(
                  prob_data[i * inner_num_*num_labels_ + l * inner_num_ + j],
                  Dtype(kLOG_THRESHOLD)));
      }
      ++count;
    }
  }
  top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count);
  if (top.size() == 2) {
    top[1]->ShareData(prob_);
  }
}