Esempio n. 1
0
void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const Dtype* prob_data = prob_.cpu_data();
    caffe_copy(prob_.count(), prob_data, bottom_diff);
    const Dtype* label = bottom[1]->cpu_data();
    int dim = prob_.count() / outer_num_;
    int count = 0;
    for (int i = 0; i < outer_num_; ++i) {
      for (int j = 0; j < inner_num_; ++j) {
        const int label_value = static_cast<int>(label[i * inner_num_ + j]);
        if (has_ignore_label_ && label_value == ignore_label_) {
          for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) {
            bottom_diff[i * dim + c * inner_num_ + j] = 0;
          }
        } else {
          bottom_diff[i * dim + label_value * inner_num_ + j] -= 1;
          ++count;
        }
      }
    }
    // Scale gradient
    const Dtype loss_weight = top[0]->cpu_diff()[0];
    if (normalize_) {
      caffe_scal(prob_.count(), loss_weight / count, bottom_diff);
    } else {
      caffe_scal(prob_.count(), loss_weight / outer_num_, bottom_diff);
    }
  }
}
  void HingeLossLayer<Dtype>::Backward_cpu(
      const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      const vector<Blob<Dtype>*>& bottom) {
    if (propagate_down[1]) {
      LOG(FATAL)<< this->type()
      << " Layer cannot backpropagate to label inputs.";
    }
    if (propagate_down[0]) {
      Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
      const Dtype* label = bottom[1]->cpu_data();
      int num = bottom[0]->num();
      int count = bottom[0]->count();
      int dim = count / num;

      for (int i = 0; i < num; ++i) {
        bottom_diff[i * dim + static_cast<int>(label[i])] *= -1;
      }

      const Dtype loss_weight = top[0]->cpu_diff()[0];
      switch (this->layer_param_.hinge_loss_param().norm()) {
        case HingeLossParameter_Norm_L1:
        caffe_cpu_sign(count, bottom_diff, bottom_diff);
        caffe_scal(count, loss_weight / num, bottom_diff);
        break;
        case HingeLossParameter_Norm_L2:
        caffe_scal(count, loss_weight * 2 / num, bottom_diff);
        break;
        default:
        LOG(FATAL) << "Unknown Norm";
      }
    }
  }
Esempio n. 3
0
void Blob<Dtype>::scale_diff(Dtype scale_factor) {
  Dtype* diff;
  if (!diff_) { return; }
  switch (diff_->head()) {
  case SyncedMemory::SYNCED_PRV:
  case SyncedMemory::HEAD_AT_PRV:
      diff = mutable_prv_diff();
      caffe_scal(prv_diff_count(), scale_factor, diff);
      break;
  case SyncedMemory::HEAD_AT_CPU:
    diff = mutable_cpu_diff();
    caffe_scal(count_, scale_factor, diff);
    return;
  case SyncedMemory::HEAD_AT_GPU:
  case SyncedMemory::SYNCED:
#ifndef CPU_ONLY
    diff = mutable_gpu_diff();
    caffe_gpu_scal(count_, scale_factor, diff);
    return;
#else
    NO_GPU;
#endif
  case SyncedMemory::UNINITIALIZED:
    return;
  default:
    LOG(FATAL) << "Unknown SyncedMemory head state: " << diff_->head();
  }
}
Esempio n. 4
0
void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    Dtype* top_diff = top[0]->mutable_cpu_diff();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    if (this->phase_ == TRAIN) {
		if (drop_batch_){
			Dtype drop = rand_vec_->cpu_data()[0];
			// scale + mask
			caffe_scal(top[0]->count(), Dtype(scale_ * drop), top_diff);
			caffe_copy(top[0]->count(), top_diff, bottom_diff);
		}
		else{
			// scale
			caffe_scal(top[0]->count(), scale_, top_diff);
			// multiply mask
			vector<Blob<Dtype>*> scale_bottom(2, NULL);
			scale_bottom[0] = bottom[0];
			scale_bottom[1] = rand_vec_;
			const vector<Blob<Dtype>*> scale_top(1, top[0]);
			vector<bool> prop_down(2, true);
			prop_down[1] = false;
			scale_layer_->Backward(scale_top, prop_down, scale_bottom);
		}
    } else {
      caffe_copy(top[0]->count(), top_diff, bottom_diff);
    }
  }
}
Esempio n. 5
0
void PowerLayer<Dtype>::Backward_cpu(
    const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const int count = bottom[0]->count();
    const Dtype* top_diff = top[0]->cpu_diff();
    if (diff_scale_ == Dtype(0) || power_ == Dtype(1)) {
      caffe_set(count, diff_scale_, bottom_diff);
    } else {
      const Dtype* bottom_data = bottom[0]->cpu_data();
      // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1)
      //               = diff_scale * y / (shift + scale * x)
      if (power_ == Dtype(2)) {
        // Special case for y = (shift + scale * x)^2
        //     -> dy/dx = 2 * scale * (shift + scale * x)
        //              = diff_scale * shift + diff_scale * scale * x
        caffe_cpu_axpby(
            count,
            diff_scale_ * scale_,
            bottom_data,
            Dtype(0),
            bottom_diff);

        if (shift_ != Dtype(0)) {
          caffe_add_scalar(count, diff_scale_ * shift_, bottom_diff);
        }
      } else if (shift_ == Dtype(0)) {
        // Special case for y = (scale * x)^power
        //     -> dy/dx = scale * power * (scale * x)^(power - 1)
        //              = scale * power * (scale * x)^power * (scale * x)^(-1)
        //              = power * y / x
        const Dtype* top_data = top[0]->cpu_data();
        caffe_div(count, top_data, bottom_data, bottom_diff);
        caffe_scal(count, power_, bottom_diff);
      } else {
        caffe_copy(count, bottom_data, bottom_diff);
        if (scale_ != Dtype(1)) {
          caffe_scal(count, scale_, bottom_diff);
        }
        if (shift_ != Dtype(0)) {
          caffe_add_scalar(count, shift_, bottom_diff);
        }
        const Dtype* top_data = top[0]->cpu_data();
        caffe_div<Dtype>(count, top_data, bottom_diff, bottom_diff);
        if (diff_scale_ != Dtype(1)) {
          caffe_scal(count, diff_scale_, bottom_diff);
        }
      }
    }
    if (diff_scale_ != Dtype(0)) {
      caffe_mul(count, top_diff, bottom_diff, bottom_diff);
    }
  }
}
void TripletRankingHingeLossLayer<Dtype>::Backward_cpu(
	const vector<Blob<Dtype>*>& top, const vector<bool> &propagate_down,
	const vector<Blob<Dtype>*>& bottom){
	const Dtype* orignalcode;
	const Dtype* similarcode;
	const Dtype* diffrcode;
	if (propagate_down[0]) {
		for (int i = 0; i < 3; ++i) {
			for (int j = 0; j < batch_; ++j){
				Dtype* bout = bottom[i]->mutable_cpu_diff();
				orignalcode = bottom[0]->cpu_data() + bottom[0]->offset(j);
				similarcode = bottom[1]->cpu_data() + bottom[1]->offset(j);
				diffrcode = bottom[2]->cpu_data() + bottom[2]->offset(j);
				if (i == 0){
					if (dist_sq_.cpu_data()[j]>Dtype(0.0)){
						caffe_sub(dim_, diffrcode, similarcode,
							gradient.mutable_cpu_data());// the distance of F- and F+
						caffe_scal(dim_, Dtype(2) / Dtype(batch_),
							gradient.mutable_cpu_data());
					}
					else
						caffe_set(dim_, Dtype(0.0), gradient.mutable_cpu_data());
				}
				if (i == 1){
					if (dist_sq_.cpu_data()[j] > Dtype(0.0)){
						caffe_sub(dim_, similarcode, orignalcode, 
							gradient.mutable_cpu_data());// the distance of F+ and F
						caffe_scal(dim_, Dtype(2) / Dtype(batch_),
							gradient.mutable_cpu_data());
					}
					else
						caffe_set(dim_, Dtype(0.0), gradient.mutable_cpu_data());
				}
				if (i == 2){
					if (dist_sq_.cpu_data()[j] > Dtype(0.0)){
						caffe_sub(dim_, orignalcode, diffrcode,
							gradient.mutable_cpu_data()); // the distance of F and F-
						caffe_scal(dim_, Dtype(2) / Dtype(batch_),
							gradient.mutable_cpu_data());
					}
					else
						caffe_set(dim_, Dtype(0.0), gradient.mutable_cpu_data());
				}
				caffe_scal(dim_, Dtype(2.0), gradient.mutable_cpu_data());
				caffe_copy(dim_, gradient.cpu_data(), bout + (j*dim_));
			}
		}
	}
}
Esempio n. 7
0
void BilateralFilterLayer<Dtype>::Backward_cpu(
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  bilateral_interface_cpu_->Backward(
                                propagate_down[0], propagate_down[1],
                                bottom[0], bottom[1], top[0]);
  // Scale gradient
  const Dtype loss_weight = top[0]->cpu_diff()[0];
  if(propagate_down[0]) {
    caffe_scal(bottom[0]->count(), loss_weight, bottom[0]->mutable_cpu_diff());
  }
  if(propagate_down[1]) {
    caffe_scal(bottom[1]->count(), loss_weight, bottom[1]->mutable_cpu_diff());
  }
}
Esempio n. 8
0
void FocalLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) 
{
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }

  if (propagate_down[0]) {
    // data
    Dtype* bottom_diff     = bottom[0]->mutable_cpu_diff();
    const Dtype* prob_data = prob_.cpu_data();
    const Dtype* label     = bottom[1]->cpu_data();
    // intermidiate  
    const Dtype* log_prob_data   = log_prob_.cpu_data();
    const Dtype* power_prob_data = power_prob_.cpu_data();

    int count       = 0;
    int channels    = bottom[0]->shape(softmax_axis_);
    int dim         = prob_.count() / outer_num_;
    const Dtype eps = 1e-10;

    for (int i = 0; i < outer_num_; ++i) {
      for (int j = 0; j < inner_num_; ++j) {
        // label
        const int label_value = static_cast<int>(label[i * inner_num_ + j]);
        
        // ignore label
        if (has_ignore_label_ && label_value == ignore_label_) {
          for (int c = 0; c < channels; ++c) {
            bottom_diff[i * dim + c * inner_num_ + j] = 0;
          }
          continue;
        }

        // the gradient from FL w.r.t p_t, here ignore the `sign`
        int ind_i  = i * dim + label_value * inner_num_ + j; // index of ground-truth label
        Dtype grad = 0 - gamma_ * (power_prob_data[ind_i] / std::max(1 - prob_data[ind_i], eps)) * log_prob_data[ind_i] 
                       + power_prob_data[ind_i] / prob_data[ind_i];
        // the gradient w.r.t input data x
        for (int c = 0; c < channels; ++c) {
          int ind_j = i * dim + c * inner_num_ + j;
          if(c == label_value) {
            CHECK_EQ(ind_i, ind_j);
            // if i == j, (here i,j are refered for derivative of softmax)
            bottom_diff[ind_j] = grad * prob_data[ind_i] * (prob_data[ind_i] - 1);
          } else {
            // if i != j, (here i,j are refered for derivative of softmax)
            bottom_diff[ind_j] = grad * prob_data[ind_i] * prob_data[ind_j];
          }
        }
        // count                    
        ++count;
      }
    }
    // Scale gradient
    Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count);
    caffe_scal(prob_.count(), loss_weight, bottom_diff);
  }
}
void SigmoidCrossEntropyWithValidLabelLossLayer<Dtype>::Backward_cpu(
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    // First, compute the diff
    const int count = bottom[0]->count();
    const int num = bottom[0]->num();
    const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
    const Dtype* target = bottom[1]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    caffe_sub(count, sigmoid_output_data, target, bottom_diff);
    if (bottom.size() == 3) { // a valid label is specified
      const Dtype* valid = bottom[2]->cpu_data();
      for (int i = 0; i < count; i++) {
        if (! valid[i]) {
          bottom_diff[i] = 0;
        }
      }
    }
    // Scale down gradient
    const Dtype loss_weight = top[0]->cpu_diff()[0];
    caffe_scal(count, loss_weight / num, bottom_diff);
  }
}
Esempio n. 10
0
void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    vector<Blob<Dtype>*>* bottom) {

  double regularization = this->layer_param_.inner_product_param().regularization()/2;
  if (this->param_propagate_down_[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* bottom_data = (*bottom)[0]->cpu_data();
    // Gradient with respect to weight
    caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
        top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff());
    if (regularization > 0) {
      caffe_scal(this->blobs_[0]->count(), Dtype(1.0 + regularization),
          this->blobs_[0]->mutable_cpu_diff());
    }
  }
  if (bias_term_ && this->param_propagate_down_[1]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    // Gradient with respect to bias
    caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
        bias_multiplier_.cpu_data(), (Dtype)0.,
        this->blobs_[1]->mutable_cpu_diff());
  }
  if (propagate_down[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    // Gradient with respect to bottom data
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1.,
        top_diff, this->blobs_[0]->cpu_data(), (Dtype)0.,
        (*bottom)[0]->mutable_cpu_diff());
  }
}
Esempio n. 11
0
void NormalizeLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
	if (propagate_down[0]) {
		for (int i = 0; i < bottom.size(); ++i) {
			const Dtype* bottom_data = bottom[i]->cpu_data();
			const Dtype* top_diff    = top[i]->cpu_diff();
			Dtype* bottom_diff       = bottom[i]->mutable_cpu_diff();
			Dtype scal;
			for (int n=0; n <num_; n++){
				switch (op_) { 
				case NormalizeParameter_NormalizeOp_DEMEAN:
					caffe_copy(imSz_, top_diff + n * imSz_, bottom_diff + n * imSz_);
					break;
				case NormalizeParameter_NormalizeOp_SDSCALE:
					caffe_copy(imSz_, bottom_data + n * imSz_, this->blobs_[0]->mutable_cpu_data());
					caffe_copy(imSz_, top_diff + n * imSz_, this->blobs_[0]->mutable_cpu_diff());
					//Find the Scaling Factor
					caffe_cpu_zero_mean(imSz_, this->blobs_[0]->mutable_cpu_data());
					scal = caffe_cpu_dot<Dtype>(imSz_, this->blobs_[0]->cpu_data(), 
												this->blobs_[0]->cpu_data()); 
					//Apply the scaling to the gradients
					caffe_scal(imSz_, Dtype(1.0 / scal), this->blobs_[0]->mutable_cpu_diff());  
					caffe_copy(imSz_, this->blobs_[0]->cpu_diff() , bottom_diff + n * imSz_);
					break;
				default:
					LOG(FATAL) << "Unknown elementwise operation.";
				}
			}
    }
	}
}
Esempio n. 12
0
void NormalizeLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
	for (int i = 0; i < bottom.size(); ++i) {
    const Dtype* bottom_data = bottom[i]->cpu_data();
    Dtype* top_data = top[i]->mutable_cpu_data();
   	Dtype scal;
		for (int n = 0; n < this->num_; ++n) {
			caffe_copy(imSz_, bottom_data + n * imSz_, this->blobs_[0]->mutable_cpu_data());
			switch (op_) { 
			case NormalizeParameter_NormalizeOp_DEMEAN:
				caffe_cpu_zero_mean(imSz_, this->blobs_[0]->mutable_cpu_data());
				break;
			case NormalizeParameter_NormalizeOp_SDSCALE:
				caffe_cpu_zero_mean(imSz_, this->blobs_[0]->mutable_cpu_data());
				scal = caffe_cpu_dot<Dtype>(imSz_, this->blobs_[0]->cpu_data(), 
											this->blobs_[0]->cpu_data()); 
				caffe_scal(imSz_, Dtype(1.0 / scal), this->blobs_[0]->mutable_cpu_data());  
				break;
			default:
				LOG(FATAL) << "Unknown elementwise operation.";
			}
			caffe_copy(imSz_, this->blobs_[0]->cpu_data(), top_data + n * imSz_); 
    }
  }
}
Esempio n. 13
0
void Softmax2WithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    vector<Blob<Dtype>*>* bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type_name()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
    const Dtype* prob_data = prob_.cpu_data();
	const Dtype* top_prob_data = top_prob_.cpu_data();
    caffe_copy(prob_.count(), prob_data, bottom_diff);
    const Dtype* label = (*bottom)[1]->cpu_data();
    int num = prob_.num();
    int dim = prob_.count() / num;
    int top_dim = top_prob_.count() / num;
    int spatial_dim = prob_.height() * prob_.width();
    for (int i = 0; i < num; ++i) {
      for (int j = 0; j < spatial_dim; ++j) {
	    int label_v = static_cast<int>(label[i * spatial_dim + j]);
	    int top_label_v = top_dict_.cpu_data()[label_v];
        bottom_diff[i * dim + label_v * spatial_dim + j] -= lambda_;
		for (int k = 0; k < prob_.channels(); ++k) {
          if (top_label_v == top_dict_.cpu_data()[k]) {
            bottom_diff[i * dim + k * spatial_dim +j] -= (1 - lambda_) * prob_data[i * dim + k * spatial_dim + j] / top_prob_data[i * top_dim + top_label_v * spatial_dim + j]; 
		  }
		}
      }
    }
    // Scale gradient
    const Dtype loss_weight = top[0]->cpu_diff()[0];
    caffe_scal(prob_.count(), loss_weight / num / spatial_dim, bottom_diff);
  }
}
void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type_name()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const Dtype* prob_data = prob_.cpu_data();
    caffe_copy(prob_.count(), prob_data, bottom_diff);
    const Dtype* label = bottom[1]->cpu_data();
    int num = prob_.num();
    int dim = prob_.count() / num;
    int spatial_dim = prob_.height() * prob_.width();
    for (int i = 0; i < num; ++i) {
      for (int j = 0; j < spatial_dim; ++j) {
        bottom_diff[i * dim + static_cast<int>(label[i * spatial_dim + j])
            * spatial_dim + j] -= 1;
      }
    }
    // Scale gradient
    const Dtype loss_weight = top[0]->cpu_diff()[0];
    caffe_scal(prob_.count(), loss_weight / num / spatial_dim, bottom_diff);
  }
}
Esempio n. 15
0
void WeightPlusLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
	const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){

	const Dtype* bottom_data = bottom[0]->cpu_data();
	const Dtype* top_diff = top[0]->cpu_diff();
	const Dtype* weight = this->blobs_[0]->cpu_data();
	Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

	caffe_scal(dim_, Dtype(2.0), weight_two_.mutable_cpu_data());

	// gradient with respect to weight
	for (int n = 0; n < batch_; ++n){
		int offset = n*dim_;
		caffe_mul(dim_, weight_two_.cpu_data(), bottom_data + offset, data_meta_.mutable_cpu_data() + offset);
		caffe_mul(dim_, top_diff + offset, data_meta_.cpu_data() + offset, data_meta_.mutable_cpu_data() + offset);
		caffe_axpy(dim_, Dtype(1.0), data_meta_.cpu_data() + offset, blobs_[0]->mutable_cpu_diff());
	}

	// gradient with respect to bottom data
	if (propagate_down[0]){
		for (int n = 0; n < batch_; ++n){
			int offset = n*dim_;
			caffe_mul(dim_, top_diff + offset, weight_two_.cpu_data(), bottom_diff + offset);
		}
	}

}
void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    // First, compute the diff
    const int count = bottom[0]->count();
    const int num = bottom[0]->num();
    const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
    const Dtype* target = bottom[1]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    caffe_sub(count, sigmoid_output_data, target, bottom_diff);
    // Scale down gradient
    const Dtype loss_weight = top[0]->cpu_diff()[0];
    Dtype normalizer = Dtype(num);
    if (this->layer_param_.loss_param().normalization() == LossParameter_NormalizationMode_NONE)
      normalizer = 1.f;
#ifdef USE_MLSL
    else {
      // We assume local bs is same across all nodes
      normalizer *= mn::get_group_size();
    }
#endif
    caffe_scal(count, loss_weight / normalizer, bottom_diff);
  }
}
Esempio n. 17
0
void ReductionLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* mult_data = NULL;
  if (sum_multiplier_.count() > 0) {
    mult_data = sum_multiplier_.cpu_data();
  }
  Dtype* top_data = top[0]->mutable_cpu_data();
  for (int i = 0; i < num_; ++i) {
    switch (op_) {
    case ReductionParameter_ReductionOp_SUM:
    case ReductionParameter_ReductionOp_MEAN:
      *top_data = caffe_cpu_dot(dim_, mult_data, bottom_data);
      break;
    case ReductionParameter_ReductionOp_ASUM:
      *top_data = caffe_cpu_asum(dim_, bottom_data);
      break;
    case ReductionParameter_ReductionOp_SUMSQ:
      *top_data = caffe_cpu_dot(dim_, bottom_data, bottom_data);
      break;
    default:
      LOG(FATAL) << "Unknown reduction op: "
          << ReductionParameter_ReductionOp_Name(op_);
    }
    bottom_data += dim_;
    ++top_data;
  }
  if (coeff_ != Dtype(1)) {
    // Reset the top_data pointer.
    top_data = top[0]->mutable_cpu_data();
    caffe_scal(num_, coeff_, top_data);
  }
}
void SigmoidCrossEntropyLossMaskLayer<Dtype>::Backward_cpu(
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    // First, compute the diff
    const int count = bottom[0]->count();
   // const int num = bottom[0]->num();
    const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
    const Dtype* target = bottom[1]->cpu_data();
    
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    caffe_sub(count, sigmoid_output_data, target, bottom_diff);
    //////////////////////////////// my change /////////////////////////////////////
    const Dtype* use_mask = mask_.cpu_data();
    for (int i = 0; i < count; ++i){
      if(negsig){bottom_diff[i]=0;}
      if(use_mask[i]==0){bottom_diff[i]=0;}
    }
 //   LOG(INFO) << "Backward use_mask num" << mask_.asum_data();
  /////////////////////////////// my change /////////////////////////////////////
    // Scale down gradient
    const Dtype loss_weight = top[0]->cpu_diff()[0];
    caffe_scal(count, loss_weight / mask_.asum_data(), bottom_diff);
  }
}
Esempio n. 19
0
void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  Dtype* mask = rand_vec_->mutable_cpu_data();
  const int count = rand_vec_->count();
  if (this->phase_ == TRAIN) {
	  switch (drop_type_){
	  case DropoutParameter_DropType_BERNOULLI:
	  {
	    // Create random numbers
	    caffe_rng_bernoulli(count, Dtype(1. - threshold_), mask);
	    break;
	  }
	  case DropoutParameter_DropType_GAUSSIAN:
	  {
	   caffe_rng_gaussian(count, Dtype(mu_), Dtype(sigma_), mask);
	   // clip to be in [0,1]
	   for (int i = 0; i < rand_vec_->count(); ++i){
	  	 Dtype m = mask[i];
	  	 mask[i] = m > 1 ? 1 : (m < 0 ? 0 : m);
	   }
	   break;
	  }
	  case DropoutParameter_DropType_UNIFORM:
	  {
	    caffe_rng_uniform(count, Dtype(a_), Dtype(b_), mask);
		break;
	  }
	  }
	  if (drop_batch_){
		  Dtype drop = mask[0];
		  caffe_copy(top[0]->count(), bottom_data, top_data);
		  caffe_scal(top[0]->count(), Dtype(scale_ * drop), top_data);
	  }
	  else{
		  vector<Blob<Dtype>*> scale_bottom(2, NULL);
		  scale_bottom[0] = bottom[0];
		  scale_bottom[1] = rand_vec_;
		  const vector<Blob<Dtype>*> scale_top(1, top[0]);
		  scale_layer_->Forward(scale_bottom, scale_top);
		  caffe_scal(top[0]->count(), scale_, top_data);
	  }
  } else {
    caffe_copy(bottom[0]->count(), bottom_data, top_data);
  }
}
void BatchContrastiveLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const Dtype* label = bottom[1]->cpu_data();
    int num = bottom[0]->num();

    caffe_set(num*num, Dtype(0), bottom_diff);
    if (max_only_) {
      if (max_positive_1_ >= 0 && max_positive_2_ >= 0) {
        bottom_diff[max_positive_1_ * num + max_positive_2_] = positive_weight_;
      }
      if (min_negative_1_ >= 0 && min_negative_2_ >= 0) {
        bottom_diff[min_negative_1_ * num + min_negative_2_] = -negative_weight_;
      }
    }
    else {
      for (int i = 0; i < num; ++i) {
        for (int j = i + 1; j < num; ++j) {
          if (label[i] == label[j]) {
            if (bottom_data[i*num + j] > positive_margin_) {
              bottom_diff[i*num + j] = positive_weight_;
            }
          }
          else {
            if (bottom_data[i*num + j] < negative_margin_) {
              bottom_diff[i*num + j] = -negative_weight_;
            }
          }
        }
      }
    }

    const Dtype loss_weight = top[0]->cpu_diff()[0];
    if (max_only_) {
      caffe_scal(bottom[0]->count(), loss_weight / 2, bottom_diff);
    }
    else {
      caffe_scal(bottom[0]->count(), loss_weight / num, bottom_diff);
    }
  }
}
Esempio n. 21
0
	void IslandLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down,
		const vector<Blob<Dtype>*>& bottom) {
		// Gradient with respect to centers
		if (this->param_propagate_down_[0]) {
			const Dtype* label = bottom[1]->cpu_data();
			Dtype* center_diff = this->blobs_[0]->mutable_cpu_diff();
			const Dtype* center_data = this->blobs_[0]->cpu_data();
			Dtype* variation_sum_data = variation_sum_.mutable_cpu_data();
			const Dtype* distance_data = distance_.cpu_data();

			// \sum_{y_i==j}
			caffe_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data());
			for (int n = 0; n < N_; n++) {
				int count = 0;
				for (int m = 0; m < M_; m++) {
					const int label_value = static_cast<int>(label[m]);
					if (label_value == n) {
						count++;
						caffe_sub(K_, variation_sum_data + n * K_, distance_data + m * K_, variation_sum_data + n * K_);
					}
				}
				caffe_axpy(K_, (Dtype)1. / (count + (Dtype)1.), variation_sum_data + n * K_, center_diff + n * K_);
			}


			//xcenter_loss backward
			for (int n = 0; n < N_; ++n){
				Dtype double_center_module_n = center_module_[n] * center_module_[n];
				for (int i = 0; i < N_; ++i){
					if (i == n){
						continue;
					}
					else{ // 更新i
						Dtype alpha = center_module_[n] * center_module_[i];
						Dtype belta = center_dot_[n][i] / (alpha*double_center_module_n);
						//alpha*c_i-beta*c_n
						for (int k = 0; k < K_; ++k){
							//由于重复计算,实际计算的次数为2因此 center_diff的值需要乘以2
							center_diff[n*K_ + k] = 2*lambda_/(N_-1)*(alpha*center_data[i*K_ + k] - belta*center_data[n*K_ + k]);
						}
					}
				}
			}
		}

		

		// Gradient with respect to bottom data 
		if (propagate_down[0]) {
			caffe_copy(M_ * K_, distance_.cpu_data(), bottom[0]->mutable_cpu_diff());
			caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff());
		}
		if (propagate_down[1]) {
			LOG(FATAL) << this->type()
				<< " Layer cannot backpropagate to label inputs.";
		}
	}
void EuclideanLossWithIgnoreLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
	int count  = bottom[0]->count();
	//Number of elements in the batch
	int bCount  = bottom[0]->count(1, bottom[0]->num_axes());
	int lbCount = bottom[1]->count(1, bottom[1]->num_axes());
	int N      = bottom[0]->shape(0);
	if (lCount_ == Dtype(0)){
		LOG(INFO) << "EuclideanLossWithIgnore was Silent for this batch";
		return;
	}
	//Compute the gradients
	for (int i = 0; i < 2; ++i) {
		const Dtype sign = (i == 0) ? 1 : -1;
		const Dtype alpha = sign * top[0]->cpu_diff()[0] / lCount_;
		Dtype Z;
		const Dtype* botZData = bottom[nc_]->cpu_data();
		Dtype* botDiff       = bottom[i]->mutable_cpu_diff(); 
		const Dtype* labels  = bottom[1]->cpu_data();       
		Dtype* diff          = diff_.mutable_cpu_data();
		const Dtype* diffC   = diff_.cpu_data();
		if (propagate_down[i]) {
			for (int n=0; n < N; ++n){
				if (labels[bCount] == Dtype(1)){ 
					if (is_normalize_){
						Z = caffe_cpu_dot(bCount, botZData, botZData);
						if (Z>0){
							caffe_scal(count, Z, diff);
						}
					}

				caffe_cpu_axpby(
						bCount,              // count
						alpha,               // alpha
						diffC,               // a
						Dtype(0),                           // beta
						botDiff);  // b
				}
				labels += lbCount;
				diff   += bCount;
				diffC  += bCount;
				if (nc_==0){
					botZData += bCount;
				}else{
					botZData += lbCount;
				}
				if (i==0){
					botDiff  += bCount;
				}else {
					botDiff  += lbCount; 
				} 
			}
		}
	}
}
void WeightedSoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const Dtype* prob_data = prob_.cpu_data();
    caffe_copy(prob_.count(), prob_data, bottom_diff);
    const Dtype* label = bottom[1]->cpu_data();
    const Dtype* sample_weight = bottom[2]->cpu_data();
    int num = prob_.num();
    int dim = prob_.count() / num;
    int spatial_dim = prob_.height() * prob_.width();
    int count = 0;
    for (int i = 0; i < num; ++i) {
      for (int j = 0; j < spatial_dim; ++j) {
        const int label_value = static_cast<int>(label[i * spatial_dim + j]);
        if (has_ignore_label_ && label_value == ignore_label_) {
          for (int c = 0; c < bottom[0]->channels(); ++c) {
            bottom_diff[i * dim + c * spatial_dim + j] = 0;
          }
        } else {
          bottom_diff[i * dim + label_value * spatial_dim + j] -= 1;
          Dtype w = sample_weight[i * spatial_dim + j];
          for (int k = 0; k < dim; ++k) {
            bottom_diff[i * dim + k * spatial_dim + j] *= w;
          }
          ++count;
        }
      }
    }
    // Scale gradient
    const Dtype loss_weight = top[0]->cpu_diff()[0];
    if (normalize_) {
      caffe_scal(prob_.count(), loss_weight / count, bottom_diff);
    } else {
      caffe_scal(prob_.count(), loss_weight / num, bottom_diff);
    }
  }
}
void SigmoidWeightedCrossEntropyLossLayer<Dtype>::Backward_cpu(
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[2]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to weight inputs.";
  }
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    // First, compute the diff
    const int count = bottom[0]->count();
    const int num = bottom[0]->num();
    const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
    const Dtype* target = bottom[1]->cpu_data();
    const Dtype* weight = bottom[2]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

    Dtype* tmp = new Dtype[count << 1];
    Dtype* tmp1 = tmp + count;

    // diff: 1/2
    caffe_set(count, (Dtype)0.5, bottom_diff);
    // diff: 1/2 * \hat{p}
    caffe_mul(count, bottom_diff, sigmoid_output_data, bottom_diff);
    // diff: 1/2 * (1-p) * \hat{p}
    caffe_set(count, (Dtype)1, tmp1);
    caffe_sub(count, tmp1, target, tmp);
    caffe_mul(count, bottom_diff, tmp, bottom_diff);
    // diff: 1/2(1-w) * (1-p) * \hat{p}
    caffe_sub(count, tmp1, weight, tmp);
    caffe_div(count, bottom_diff, tmp, bottom_diff);

    // tmp: 1-\hat{p}
    caffe_sub(count, tmp1, sigmoid_output_data, tmp);
    // tmp: p * (1-\hat{p})
    caffe_mul(count, tmp, target, tmp);
    // tmp: -1/2 * p * (1-\hat{p})
    caffe_set(count, (Dtype)-0.5, tmp1);
    caffe_mul(count, tmp, tmp1, tmp);
    // tmp: -1/2w * p * (1-\hat{p})
    caffe_div(count, tmp, weight, tmp);
    // diff: -(1/2w * p * (1-\hat{p}) - 1/2(1-w) * (1-p) * \hat{p})
    caffe_add(count, bottom_diff, tmp, bottom_diff);

    delete[] tmp;

    // Scale down gradient
    const Dtype loss_weight = top[0]->cpu_diff()[0];
    caffe_scal(count, loss_weight / num, bottom_diff);
  }
}
Esempio n. 25
0
void LogLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  const int count = bottom[0]->count();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  caffe_cpu_copy(count, bottom_data, bottom_diff);
  if (input_scale_ != Dtype(1)) {
    caffe_scal(count, input_scale_, bottom_diff);
  }
  if (input_shift_ != Dtype(0)) {
    caffe_add_scalar(count, input_shift_, bottom_diff);
  }
  caffe_powx(count, bottom_diff, Dtype(-1), bottom_diff);
  if (backward_num_scale_ != Dtype(1)) {
    caffe_scal(count, backward_num_scale_, bottom_diff);
  }
  caffe_mul(count, top_diff, bottom_diff, bottom_diff);
}
Esempio n. 26
0
void MyNeuronLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& bottom){
  const int count = top[0]->count();
  const Dtype* top_diff = top[0]->cpu_diff();
  if(propagate_down[0]){
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    caffe_powx(count, bottom_data, Dtype(power_ - 1), bottom_diff);
    caffe_scal(count, Dtype(power_), bottom_diff);
    caffe_mul(count, bottom_diff, top_diff, bottom_diff);
  }

}
Esempio n. 27
0
void ApplyUpdate(shared_ptr<Net<Dtype> > net, Dtype lr, int node_id)
{
    const vector<shared_ptr<Blob<Dtype> > >& net_params = net -> params();

    for (int i = 0; i < net_params.size(); i++) {
        auto param = net_params[i];
        recv_diff(param.get(), node_id);
        caffe_scal(param -> count(), lr, param -> mutable_cpu_diff());
        param -> Update();
        send_data(param.get(), node_id);
    }
}
Esempio n. 28
0
void ExpLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  const int count = bottom[0]->count();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  caffe_mul(count, top_data, top_diff, bottom_diff);
  if (inner_scale_ != Dtype(1)) {
    caffe_scal(count, inner_scale_, bottom_diff);
  }
}
Esempio n. 29
0
void LogLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const int count = bottom[0]->count();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  if (input_scale_ == Dtype(1) && input_shift_ == Dtype(0)) {
    caffe_log(count, bottom_data, top_data);
  } else {
    caffe_cpu_copy(count, bottom_data, top_data);
    if (input_scale_ != Dtype(1)) {
      caffe_scal(count, input_scale_, top_data);
    }
    if (input_shift_ != Dtype(0)) {
      caffe_add_scalar(count, input_shift_, top_data);
    }
    caffe_log(count, top_data, top_data);
  }
  if (base_scale_ != Dtype(1)) {
    caffe_scal(count, base_scale_, top_data);
  }
}
Esempio n. 30
0
void InfogainLossLayer<Dtype, MItype, MOtype>::Backward_cpu(
    const vector<Blob<MOtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<MItype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down.size() > 2 && propagate_down[2]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to infogain inputs.";
  }
  if (propagate_down[0]) {
    const Dtype* prob_data = prob_.cpu_data();
    const Dtype* bottom_label = bottom[1]->cpu_data();
    const Dtype* infogain_mat = NULL;
    if (bottom.size() < 3) {
      infogain_mat = infogain_.cpu_data();
    } else {
      infogain_mat = bottom[2]->cpu_data();
      // H is provided as a "bottom" and might change. sum rows every time.
      sum_rows_of_H(bottom[2]);
    }
    const Dtype* sum_rows_H = sum_rows_H_.cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const int_tp dim = bottom[0]->count() / outer_num_;
    int_tp count = 0;
    for (int_tp i = 0; i < outer_num_; ++i) {
      for (int_tp j = 0; j < inner_num_; ++j) {
        const int_tp label_value =
          static_cast<int_tp>(bottom_label[i * inner_num_ + j]);
        DCHECK_GE(label_value, 0);
        DCHECK_LT(label_value, num_labels_);
        if (has_ignore_label_ && label_value == ignore_label_) {
          for (int_tp l = 0; l < num_labels_; ++l) {
            bottom_diff[i * dim + l * inner_num_ + j] = 0;
          }
        } else {
          for (int_tp l = 0; l < num_labels_; ++l) {
            bottom_diff[i * dim + l * inner_num_ + j] =
               prob_data[i*dim + l*inner_num_ + j]*sum_rows_H[label_value]
               - infogain_mat[label_value * num_labels_ + l];
          }
          ++count;
        }
      }
    }
    // Scale gradient
    Dtype loss_weight = top[0]->cpu_diff()[0] /
                        get_normalizer(normalization_, count);
    caffe_scal(bottom[0]->count(), loss_weight, bottom_diff);
  }
}