void TripletRankingHingeLossLayer<Dtype>::Forward_cpu(
	const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top){
	int dim_v = batch_*dim_;
	const Dtype* sub_or_si;
	const Dtype* sub_or_di;
	Dtype b = 2;
	Dtype Tripletlosstotal(0.0);

	//The triplet ranking loss
	caffe_sub(dim_v, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_sub_or_si.mutable_cpu_data()); // F-F+
	caffe_sub(dim_v, bottom[0]->cpu_data(), bottom[2]->cpu_data(), diff_sub_or_di.mutable_cpu_data()); // F-F-
	caffe_powx(dim_v, diff_sub_or_si.cpu_data(), Dtype(2.0), diff_pow_or_si.mutable_cpu_data());		  //Pow
	caffe_powx(dim_v, diff_sub_or_di.cpu_data(), Dtype(2.0), diff_pow_or_di.mutable_cpu_data());       //Pow

	for (int n = 0; n < batch_; n++){
		sub_or_si = diff_pow_or_si.cpu_data() + diff_pow_or_si.offset(n);
		sub_or_di = diff_pow_or_di.cpu_data() + diff_pow_or_di.offset(n);
		Dtype result1 = 0;
		Dtype result2 = 0;
		result1 = caffe_cpu_asum(dim_, sub_or_si);
		result2 = caffe_cpu_asum(dim_, sub_or_di);
		Dtype loss(0.0);
		loss = std::max(margin + result1 - result2, Dtype(0));// compute the loss
		diff_.mutable_cpu_data()[n] = loss; // save the loss[i]
	}
	for (int k = 0; k < batch_; k++){

		dist_sq_.mutable_cpu_data()[k] = diff_.cpu_data()[k];// save the loss[i] for BP
		Tripletlosstotal += dist_sq_.cpu_data()[k];
	}
	Tripletlosstotal = Tripletlosstotal / static_cast<Dtype>(bottom[0]->num()); //get the average loss
	top[0]->mutable_cpu_data()[0] = Tripletlosstotal;
}
	Dtype TripletClipHingeLossLayer<Dtype>::
		compute_structureloss(const vector<Blob<Dtype>*>& bottom){

			Dtype Structureloss(0.0);
			int batch_size = bottom[0]->num() / frame_num;
			for (int i = 0; i < batch_size; ++i){
				for (int j = 0; j < frame_num - 1; ++j){
					int index_1 = i*frame_num*dim + j*dim;
					int index_2 = i*frame_num*dim + (j + 1)*dim;
					int direct = i*(frame_num - 1)*dim + j*dim;
					caffe_sub(dim, bottom[0]->cpu_data() + index_1,
						bottom[0]->cpu_data() + index_2, sub_or.mutable_cpu_data() + direct);
					caffe_sub(dim, bottom[1]->cpu_data() + index_1,
						bottom[1]->cpu_data() + index_2, sub_si.mutable_cpu_data() + direct);
					caffe_sub(dim, bottom[2]->cpu_data() + index_1,
						bottom[2]->cpu_data() + index_2, sub_di.mutable_cpu_data() + direct);
					// pow 
					caffe_powx(dim, sub_or.cpu_data() + direct, Dtype(2.0), pow_sub_or.mutable_cpu_data() + direct);
					caffe_powx(dim, sub_si.cpu_data() + direct, Dtype(2.0), pow_sub_si.mutable_cpu_data() + direct);
					caffe_powx(dim, sub_di.cpu_data() + direct, Dtype(2.0), pow_sub_di.mutable_cpu_data() + direct);
					// plus
					Structureloss += (caffe_cpu_asum(dim, pow_sub_or.cpu_data() + direct)
						+ caffe_cpu_asum(dim, pow_sub_si.cpu_data() + direct)
						+ caffe_cpu_asum(dim, pow_sub_di.cpu_data() + direct));
				}
			}
			return Structureloss / (batch_size*(frame_num - 1) * 3);
		}
Exemplo n.º 3
0
void MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
                                  vector<Blob<Dtype>*>* top) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* top_data = (*top)[0]->mutable_cpu_data();
    int num;
    if (this->layer_param_.mvn_param().across_channels())
        num = bottom[0]->num();
    else
        num = bottom[0]->num() * bottom[0]->channels();

    int dim = bottom[0]->count() / num;
    Dtype eps = 1e-10;

    if (this->layer_param_.mvn_param().normalize_variance()) {
        // put the squares of bottom into temp_
        caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
                   temp_.mutable_cpu_data());

        // computes variance using var(X) = E(X^2) - (EX)^2
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
                              sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());  // EX
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
                              sum_multiplier_.cpu_data(), 0.,
                              variance_.mutable_cpu_data());  // E(X^2)
        caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2),
                   temp_.mutable_cpu_data());  // (EX)^2
        caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(),
                  variance_.mutable_cpu_data());  // variance

        // do mean and variance normalization
        // subtract mean
        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
                              mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              temp_.mutable_cpu_data());

        caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);

        // normalize variance
        caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
                   variance_.mutable_cpu_data());

        caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data());

        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
                              variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              temp_.mutable_cpu_data());

        caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
    } else {
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
                              sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());  // EX

        // subtract mean
        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
                              mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              temp_.mutable_cpu_data());

        caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);
    }
}
	Dtype TripletClipHingeLossLayer<Dtype>::compute_tripletloss(int batchsize,
		int Dimv){
		Dtype Tripletlosstotal(0.0);
		const Dtype* sub_or_si;
		const Dtype* sub_or_di;
		//The triplet ranking loss
		caffe_sub(Dimv, ave_or.cpu_data(), ave_si.cpu_data(), diff_sub_or_si.mutable_cpu_data()); // F-F+
		caffe_sub(Dimv, ave_or.cpu_data(), ave_di.cpu_data(), diff_sub_or_di.mutable_cpu_data()); // F-F-
		caffe_powx(Dimv, diff_sub_or_si.cpu_data(), Dtype(2.0), diff_pow_or_si.mutable_cpu_data());		  //Pow
		caffe_powx(Dimv, diff_sub_or_di.cpu_data(), Dtype(2.0), diff_pow_or_di.mutable_cpu_data());       //Pow
		for (int n = 0; n < batchsize; n++)
		{
			sub_or_si = diff_pow_or_si.cpu_data() + diff_pow_or_si.offset(n);
			sub_or_di = diff_pow_or_di.cpu_data() + diff_pow_or_di.offset(n);
			Dtype result1 = 0;
			Dtype result2 = 0;
			result1 = caffe_cpu_asum(dim, sub_or_si);
			result2 = caffe_cpu_asum(dim, sub_or_di);
			Dtype loss(0.0);
			loss = std::max(margin + result1 - result2, Dtype(FLT_MIN));// compute the loss
			diff_.mutable_cpu_data()[n] = loss; // save the loss[i]
		}
		for (int k = 0; k < batchsize; k++)
		{

			dist_sq_.mutable_cpu_data()[k] = diff_.cpu_data()[k];// save the loss[i] for BP
			Tripletlosstotal += dist_sq_.cpu_data()[k];
		}
		return Tripletlosstotal / static_cast<Dtype>(batchsize);
	}
Exemplo n.º 5
0
void RMSPropSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
  const vector<float>& net_params_lr = this->net_->params_lr();

  // get the learning rate
  Dtype delta = this->param_.delta();
  Dtype rms_decay = this->param_.rms_decay();
  Dtype local_rate = rate * net_params_lr[param_id];

  switch (Caffe::mode()) {
  case Caffe::CPU:
    // compute square of gradient in update
    caffe_powx(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(), Dtype(2),
        this->update_[param_id]->mutable_cpu_data());

    // update history
    caffe_cpu_axpby(net_params[param_id] -> count(),
        Dtype(1-rms_decay), this->update_[param_id]->cpu_data(),
        rms_decay, this->history_[param_id]-> mutable_cpu_data());

    // prepare update
    caffe_powx(net_params[param_id]->count(),
        this->history_[param_id]->cpu_data(), Dtype(0.5),
        this->update_[param_id]->mutable_cpu_data());

    caffe_add_scalar(net_params[param_id]->count(),
        delta, this->update_[param_id]->mutable_cpu_data());

    caffe_div(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(),
        this->update_[param_id]->mutable_cpu_data());

    // scale and copy
    caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
        this->update_[param_id]->cpu_data(), Dtype(0),
        net_params[param_id]->mutable_cpu_diff());
    break;
  case Caffe::GPU:
#ifndef CPU_ONLY
    rmsprop_update_gpu(net_params[param_id]->count(),
        net_params[param_id]->mutable_gpu_diff(),
        this->history_[param_id]->mutable_gpu_data(),
        rms_decay, delta, local_rate);
#else
    NO_GPU;
#endif
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
  }
}
Exemplo n.º 6
0
void MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  int num;
  if (this->layer_param_.mvn_param().across_channels())
    num = bottom[0]->num();
  else
    num = bottom[0]->num() * bottom[0]->channels();

  int dim = bottom[0]->count() / num;

  // subtract mean
  caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
      sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());  // EX
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
      mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
      temp_.mutable_cpu_data());
  caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);  // X-EX

  if (this->layer_param_.mvn_param().normalize_variance()) {
    // compute variance using var(X) = E((X-EX)^2)
    caffe_powx(bottom[0]->count(), top_data, Dtype(2),
        temp_.mutable_cpu_data());  // (X-EX)^2
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
        sum_multiplier_.cpu_data(), 0.,
        variance_.mutable_cpu_data());  // E((X-EX)^2)

    // normalize variance
    caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
          variance_.mutable_cpu_data());

    caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data());

    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
          variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
          temp_.mutable_cpu_data());

    caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
  }
}
Exemplo n.º 7
0
void WeightPlusLayer<Dtype>::Forward_cpu(
	const vector<Blob<Dtype>*>& bottom,
	const vector<Blob<Dtype>*>& top){
	const Dtype* bottom_data = bottom[0]->cpu_data();
	const Dtype* weight = this->blobs_[0]->cpu_data();
	Dtype* top_data = top[0]->mutable_cpu_data();
	for (int n = 0; n < batch_; ++n){
		int offset = n*dim_;
		caffe_powx(dim_, weight, Dtype(2.0), weight_pow_.mutable_cpu_data());
		caffe_mul(dim_, bottom_data + offset, weight_pow_.cpu_data(), top_data + offset);
	}
}
Exemplo n.º 8
0
void MyNeuronLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& bottom){
  const int count = top[0]->count();
  const Dtype* top_diff = top[0]->cpu_diff();
  if(propagate_down[0]){
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    caffe_powx(count, bottom_data, Dtype(power_ - 1), bottom_diff);
    caffe_scal(count, Dtype(power_), bottom_diff);
    caffe_mul(count, bottom_diff, top_diff, bottom_diff);
  }

}
Exemplo n.º 9
0
void MVNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  const Dtype* top_diff = top[0]->cpu_diff();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

  int num;
  if (this->layer_param_.mvn_param().across_channels())
    num = bottom[0]->num();
  else
    num = bottom[0]->num() * bottom[0]->channels();

  int dim = bottom[0]->count() / num;

  if (this->layer_param_.mvn_param().normalize_variance()) {
    caffe_mul(temp_.count(), top_data, top_diff, bottom_diff);
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff,
          sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
          mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
          bottom_diff);
    caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff);

    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff,
            sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
            mean_.cpu_data(), sum_multiplier_.cpu_data(), 1.,
            bottom_diff);

    caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim),
        bottom_diff);

    // put the squares of bottom into temp_
    caffe_powx(temp_.count(), bottom_data, Dtype(2),
        temp_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
        variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
        temp_.mutable_cpu_data());

    caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff);
  } else {
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, top_diff,
      sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
      mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
      temp_.mutable_cpu_data());
    caffe_add(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff);
  }
}
Exemplo n.º 10
0
void EuclideanDistLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  int count = bottom[0]->count();
  int num = bottom[0]->num();
  int dim = count / num;
  caffe_sub(
      count,
      bottom[0]->cpu_data(),
      bottom[1]->cpu_data(),
      diff_.mutable_cpu_data());
  caffe_powx(count, diff_.cpu_data(), Dtype(2), temp_.mutable_cpu_data());
  caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, Dtype(0.5), temp_.cpu_data(),
      sum_multiplier_.cpu_data(), 0.,
      top[0]->mutable_cpu_data());
}
Exemplo n.º 11
0
void LogLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  const int count = bottom[0]->count();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  caffe_cpu_copy(count, bottom_data, bottom_diff);
  if (input_scale_ != Dtype(1)) {
    caffe_scal(count, input_scale_, bottom_diff);
  }
  if (input_shift_ != Dtype(0)) {
    caffe_add_scalar(count, input_shift_, bottom_diff);
  }
  caffe_powx(count, bottom_diff, Dtype(-1), bottom_diff);
  if (backward_num_scale_ != Dtype(1)) {
    caffe_scal(count, backward_num_scale_, bottom_diff);
  }
  caffe_mul(count, top_diff, bottom_diff, bottom_diff);
}
Exemplo n.º 12
0
void FocalLossLayer<Dtype>::compute_intermediate_values_of_cpu() {
  // compute the corresponding variables
  const int count        = prob_.count();
  const Dtype* prob_data = prob_.cpu_data();
  const Dtype* ones_data = ones_.cpu_data();
  Dtype* log_prob_data   = log_prob_.mutable_cpu_data();
  Dtype* power_prob_data = power_prob_.mutable_cpu_data();

  /// log(p_t)
  const Dtype eps        = Dtype(FLT_MIN); // where FLT_MIN = 1.17549e-38, here u can change it
  // more stable
  for(int i = 0; i < prob_.count(); i++) {
    log_prob_data[i] = log(std::max(prob_data[i], eps));
  }
  /// caffe_log(count,  prob_data, log_prob_data);

  /// alpha* (1 - p_t) ^ gamma
  caffe_sub(count,  ones_data, prob_data, power_prob_data);
  caffe_powx(count, power_prob_.cpu_data(), gamma_, power_prob_data);
  caffe_scal(count, alpha_, power_prob_data);
}
Exemplo n.º 13
0
void PowerLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  Dtype* top_data = top[0]->mutable_cpu_data();
  const int count = bottom[0]->count();
  // Special case where we can ignore the input: scale or power is 0.
  if (diff_scale_ == Dtype(0)) {
    Dtype value = (power_ == 0) ? Dtype(1) : pow(shift_, power_);
    caffe_set(count, value, top_data);
    return;
  }
  const Dtype* bottom_data = bottom[0]->cpu_data();
  caffe_copy(count, bottom_data, top_data);
  if (scale_ != Dtype(1)) {
    caffe_scal(count, scale_, top_data);
  }
  if (shift_ != Dtype(0)) {
    caffe_add_scalar(count, shift_, top_data);
  }
  if (power_ != Dtype(1)) {
    caffe_powx(count, top_data, power_, top_data);
  }
}
Exemplo n.º 14
0
void MyNeuronLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top){

  Dtype* top_data = top[0]->mutable_cpu_data();
  const int count = bottom[0]->count();
  caffe_powx(count, bottom[0]->cpu_data(), Dtype(power_), top_data);
}
Exemplo n.º 15
0
void BatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  int num = bottom[0]->shape(0);
  int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_);

  if (bottom[0] != top[0]) {
    caffe_copy(bottom[0]->count(), bottom_data, top_data);
  }

  if (use_global_stats_) {
    // use the stored mean/variance estimates.
    const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ?
        0 : 1 / this->blobs_[2]->cpu_data()[0];
    caffe_cpu_scale(variance_.count(), scale_factor,
        this->blobs_[0]->cpu_data(), mean_.mutable_cpu_data());
    caffe_cpu_scale(variance_.count(), scale_factor,
        this->blobs_[1]->cpu_data(), variance_.mutable_cpu_data());
  } else {
    // compute mean
    caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim,
        1. / (num * spatial_dim), bottom_data,
        spatial_sum_multiplier_.cpu_data(), 0.,
        num_by_chans_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
        num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0.,
        mean_.mutable_cpu_data());
  }

  // subtract mean
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
      batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0.,
      num_by_chans_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num,
      spatial_dim, 1, -1, num_by_chans_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), 1., top_data);

  if (!use_global_stats_) {
    // compute variance using var(X) = E((X-EX)^2)
    caffe_powx(top[0]->count(), top_data, Dtype(2),
        temp_.mutable_cpu_data());  // (X-EX)^2
    caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim,
        1. / (num * spatial_dim), temp_.cpu_data(),
        spatial_sum_multiplier_.cpu_data(), 0.,
        num_by_chans_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
        num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0.,
        variance_.mutable_cpu_data());  // E((X_EX)^2)

    // compute and save moving average
    this->blobs_[2]->mutable_cpu_data()[0] *= moving_average_fraction_;
    this->blobs_[2]->mutable_cpu_data()[0] += 1;
    caffe_cpu_axpby(mean_.count(), Dtype(1), mean_.cpu_data(),
        moving_average_fraction_, this->blobs_[0]->mutable_cpu_data());
    int m = bottom[0]->count()/channels_;
    Dtype bias_correction_factor = m > 1 ? Dtype(m)/(m-1) : 1;
    caffe_cpu_axpby(variance_.count(), bias_correction_factor,
        variance_.cpu_data(), moving_average_fraction_,
        this->blobs_[1]->mutable_cpu_data());
  }

  // normalize variance
  caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data());
  caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
             variance_.mutable_cpu_data());

  // replicate variance to input size
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
      batch_sum_multiplier_.cpu_data(), variance_.cpu_data(), 0.,
      num_by_chans_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num,
      spatial_dim, 1, 1., num_by_chans_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data());
  caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
  // TODO(cdoersch): The caching is only needed because later in-place layers
  //                 might clobber the data.  Can we skip this if they won't?
  caffe_copy(x_norm_.count(), top_data,
      x_norm_.mutable_cpu_data());
}
Exemplo n.º 16
0
void AdaGradSolver<Dtype>::ComputeUpdateValue() {
  vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
  vector<float>& net_params_lr = this->net_->params_lr();
  vector<float>& net_params_weight_decay = this->net_->params_weight_decay();
  // get the learning rate
  Dtype rate = this->GetLearningRate();
  Dtype delta = this->param_.delta();
  if (this->param_.display() && this->iter_ % this->param_.display() == 0) {
    LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate;
  }
  Dtype weight_decay = this->param_.weight_decay();
  string regularization_type = this->param_.regularization_type();
  switch (Caffe::mode()) {
  case Caffe::CPU:
    for (int param_id = 0; param_id < net_params.size(); ++param_id) {
      Dtype local_rate = rate * net_params_lr[param_id];
      Dtype local_decay = weight_decay * net_params_weight_decay[param_id];

      if (local_decay) {
        if (regularization_type == "L2") {
          // add weight decay
          caffe_axpy(net_params[param_id]->count(),
              local_decay,
              net_params[param_id]->cpu_data(),
              net_params[param_id]->mutable_cpu_diff());
        } else if (regularization_type == "L1") {
          caffe_cpu_sign(net_params[param_id]->count(),
              net_params[param_id]->cpu_data(),
              this->temp_[param_id]->mutable_cpu_data());
          caffe_axpy(net_params[param_id]->count(),
              local_decay,
              this->temp_[param_id]->cpu_data(),
              net_params[param_id]->mutable_cpu_diff());
        } else {
          LOG(FATAL) << "Unknown regularization type: " << regularization_type;
        }
      }

      // compute square of gradient in update
      caffe_powx(net_params[param_id]->count(),
          net_params[param_id]->cpu_diff(), Dtype(2),
          this->update_[param_id]->mutable_cpu_data());

      // update history
      caffe_add(net_params[param_id]->count(),
          this->update_[param_id]->cpu_data(),
          this->history_[param_id]->cpu_data(),
          this->history_[param_id]->mutable_cpu_data());

      // prepare update
      caffe_powx(net_params[param_id]->count(),
                this->history_[param_id]->cpu_data(), Dtype(0.5),
                this->update_[param_id]->mutable_cpu_data());

      caffe_add_scalar(net_params[param_id]->count(),
                delta, this->update_[param_id]->mutable_cpu_data());

      caffe_div(net_params[param_id]->count(),
                net_params[param_id]->cpu_diff(),
                this->update_[param_id]->cpu_data(),
                this->update_[param_id]->mutable_cpu_data());

      // scale and copy
      caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
          this->update_[param_id]->cpu_data(), Dtype(0),
          net_params[param_id]->mutable_cpu_diff());
    }
    break;
  case Caffe::GPU:
#ifndef CPU_ONLY
    for (int param_id = 0; param_id < net_params.size(); ++param_id) {
      Dtype local_rate = rate * net_params_lr[param_id];
      Dtype local_decay = weight_decay * net_params_weight_decay[param_id];

      if (local_decay) {
        if (regularization_type == "L2") {
          // add weight decay
          caffe_gpu_axpy(net_params[param_id]->count(),
              local_decay,
              net_params[param_id]->gpu_data(),
              net_params[param_id]->mutable_gpu_diff());
        } else if (regularization_type == "L1") {
          caffe_gpu_sign(net_params[param_id]->count(),
              net_params[param_id]->gpu_data(),
              this->temp_[param_id]->mutable_gpu_data());
          caffe_gpu_axpy(net_params[param_id]->count(),
              local_decay,
              this->temp_[param_id]->gpu_data(),
              net_params[param_id]->mutable_gpu_diff());
        } else {
          LOG(FATAL) << "Unknown regularization type: " << regularization_type;
        }
      }

      // compute square of gradient in update
      caffe_gpu_powx(net_params[param_id]->count(),
          net_params[param_id]->gpu_diff(), Dtype(2),
          this->update_[param_id]->mutable_gpu_data());

      // update history
      caffe_gpu_add(net_params[param_id]->count(),
          this->update_[param_id]->gpu_data(),
          this->history_[param_id]->gpu_data(),
          this->history_[param_id]->mutable_gpu_data());

      // prepare update
      caffe_gpu_powx(net_params[param_id]->count(),
                this->history_[param_id]->gpu_data(), Dtype(0.5),
                this->update_[param_id]->mutable_gpu_data());

      caffe_gpu_add_scalar(net_params[param_id]->count(),
                delta, this->update_[param_id]->mutable_gpu_data());

      caffe_gpu_div(net_params[param_id]->count(),
                net_params[param_id]->gpu_diff(),
                this->update_[param_id]->gpu_data(),
                this->update_[param_id]->mutable_gpu_data());

      // scale and copy
      caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
          this->update_[param_id]->gpu_data(), Dtype(0),
          net_params[param_id]->mutable_gpu_diff());
    }
#else
    NO_GPU;
#endif
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
  }
}
Exemplo n.º 17
0
void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
  const vector<float>& net_params_lr = this->net_->params_lr();
  Dtype delta = this->param_.delta();
  Dtype momentum = this->param_.momentum();
  Dtype local_rate = rate * net_params_lr[param_id];
  size_t update_history_offset = net_params.size();
  switch (Caffe::mode()) {
  case Caffe::CPU: {
    // compute square of gradient in update
    caffe_powx(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(), Dtype(2),
        this->update_[param_id]->mutable_cpu_data());

    // update history of gradients
    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
        this->update_[param_id]->cpu_data(), momentum,
        this->history_[param_id]->mutable_cpu_data());

    // add delta to history to guard against dividing by zero later
    caffe_set(net_params[param_id]->count(), delta,
        this->temp_[param_id]->mutable_cpu_data());

    caffe_add(net_params[param_id]->count(),
        this->temp_[param_id]->cpu_data(),
        this->history_[update_history_offset + param_id]->cpu_data(),
        this->update_[param_id]->mutable_cpu_data());

    caffe_add(net_params[param_id]->count(),
        this->temp_[param_id]->cpu_data(),
        this->history_[param_id]->cpu_data(),
        this->temp_[param_id]->mutable_cpu_data());

    // divide history of updates by history of gradients
    caffe_div(net_params[param_id]->count(),
        this->update_[param_id]->cpu_data(),
        this->temp_[param_id]->cpu_data(),
        this->update_[param_id]->mutable_cpu_data());

    // jointly compute the RMS of both for update and gradient history
    caffe_powx(net_params[param_id]->count(),
        this->update_[param_id]->cpu_data(), Dtype(0.5),
        this->update_[param_id]->mutable_cpu_data());

    // compute the update
    caffe_mul(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(),
        this->update_[param_id]->cpu_data(),
        net_params[param_id]->mutable_cpu_diff());

    // compute square of update
    caffe_powx(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(), Dtype(2),
        this->update_[param_id]->mutable_cpu_data());

    // update history of updates
    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
        this->update_[param_id]->cpu_data(), momentum,
        this->history_[update_history_offset + param_id]->mutable_cpu_data());

    // apply learning rate
    caffe_cpu_scale(net_params[param_id]->count(), local_rate,
        net_params[param_id]->cpu_diff(),
        net_params[param_id]->mutable_cpu_diff());
    break;
  }
  case Caffe::GPU: {
#ifndef CPU_ONLY
    adadelta_update_gpu(net_params[param_id]->count(),
        net_params[param_id]->mutable_gpu_diff(),
        this->history_[param_id]->mutable_gpu_data(),
        this->history_[update_history_offset + param_id]->mutable_gpu_data(),
        momentum, delta, local_rate);
#else
    NO_GPU;
#endif
    break;
  }
  default:
    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
  }
}
Exemplo n.º 18
0
void BNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (frozen_) {
    if (propagate_down[0]) {
      const Dtype* const_top_diff = top[0]->cpu_diff();
      Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
      // Use the moving average variance
      caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(),
          batch_statistic_.mutable_cpu_data());
      // Add eps
      caffe_add_scalar(batch_statistic_.count(), bn_eps_,
          batch_statistic_.mutable_cpu_data());
      // Standard deviation
      caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(),
          Dtype(0.5), batch_statistic_.mutable_cpu_data());
      // Divide slope by std
      caffe_div(batch_statistic_.count(), this->blobs_[0]->cpu_data(),
          batch_statistic_.cpu_data(), batch_statistic_.mutable_cpu_data());
      // Broadcast
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
          Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
          Dtype(0), spatial_statistic_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
          height_ * width_, 1, Dtype(1),
          spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
          Dtype(0), broadcast_buffer_.mutable_cpu_data());
      // Elementwise multiply top grad with (slope / std)
      caffe_mul(broadcast_buffer_.count(), const_top_diff,
          broadcast_buffer_.cpu_data(), bottom_diff);
    }
    return;
  }

  // gradient w.r.t. slope
  if (this->param_propagate_down_[0]) {
    const Dtype* const_top_diff = top[0]->cpu_diff();
    Dtype* scale_diff = this->blobs_[0]->mutable_cpu_diff();
    caffe_mul(broadcast_buffer_.count(), x_norm_.cpu_data(), const_top_diff,
        broadcast_buffer_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
        Dtype(1), broadcast_buffer_.cpu_data(),
        spatial_sum_multiplier_.cpu_data(), Dtype(0),
        spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1),
        spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
        Dtype(1), scale_diff);
  }

  // gradient w.r.t. bias
  if (this->param_propagate_down_[1]) {
    const Dtype* const_top_diff = top[0]->cpu_diff();
    Dtype* shift_diff = this->blobs_[1]->mutable_cpu_diff();
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
        Dtype(1), const_top_diff, spatial_sum_multiplier_.cpu_data(),
        Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1),
        spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
        Dtype(1), shift_diff);
  }

  // gradient w.r.t. normalized inputs
  if (propagate_down[0]) {
    const Dtype* const_top_diff = top[0]->cpu_diff();
    const Dtype* const_bottom_diff = bottom[0]->cpu_diff();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const Dtype* scale_data = this->blobs_[0]->cpu_data();

    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
        Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data,
        Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
        height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(),
        spatial_sum_multiplier_.cpu_data(), Dtype(0),
        broadcast_buffer_.mutable_cpu_data());
    caffe_mul(broadcast_buffer_.count(), const_top_diff,
        broadcast_buffer_.cpu_data(), broadcast_buffer_.mutable_cpu_data());

    // sum of x_hat * (dl / dx_hat)
    caffe_mul(broadcast_buffer_.count(), x_norm_.cpu_data(),
        broadcast_buffer_.cpu_data(), bottom_diff);
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
        Dtype(1), const_bottom_diff, spatial_sum_multiplier_.cpu_data(),
        Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1),
        spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
        Dtype(0), batch_statistic_.mutable_cpu_data());

    // x_hat times the sum
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
        Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
        Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
        height_ * width_, 1, Dtype(1),
        spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
        Dtype(0), bottom_diff);
    caffe_mul(broadcast_buffer_.count(), x_norm_.cpu_data(),
        const_bottom_diff, bottom_diff);

    // Subtract the average of x_hat times the sum
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
        Dtype(1), broadcast_buffer_.cpu_data(),
        spatial_sum_multiplier_.cpu_data(), Dtype(0),
        spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1),
        spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
        Dtype(0), batch_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
        Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
        Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
        height_ * width_, 1, Dtype(1),
        spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
        Dtype(1), bottom_diff);
    caffe_cpu_axpby(broadcast_buffer_.count(), Dtype(1),
        broadcast_buffer_.cpu_data(), Dtype(-1) / (num_ * height_ * width_),
        bottom_diff);

    // Divide by the std
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
        Dtype(1), batch_sum_multiplier_.cpu_data(), x_std_.cpu_data(),
        Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
        height_ * width_, 1, Dtype(1),
        spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
        Dtype(0), broadcast_buffer_.mutable_cpu_data());
    caffe_div(broadcast_buffer_.count(), const_bottom_diff,
        broadcast_buffer_.cpu_data(), bottom_diff);
  }
}
Exemplo n.º 19
0
void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
	const vector<Blob<Dtype>*>& top) {
	const Dtype* const_bottom_data = bottom[0]->cpu_data();
	const Dtype* const_top_data = top[0]->cpu_data();
	Dtype* top_data = top[0]->mutable_cpu_data();

	const Dtype* scale_data = this->blobs_[0]->cpu_data();
	const Dtype* shift_data = this->blobs_[1]->cpu_data();

	// ---------- mean subtraction ---------- //
	// statistic across spatial
	caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1. / (height_ * width_)), const_bottom_data,
		spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data());
	// statistic across batch
	caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1. / num_), spatial_statistic_.cpu_data(),
		batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data());
	// save history mean
	if (this->phase_ == TRAIN) {
		caffe_cpu_axpby(batch_statistic_.count(), decay_, batch_statistic_.cpu_data(), Dtype(1) - decay_,
			this->blobs_[2]->mutable_cpu_data());
	}
	if (this->phase_ == TEST && moving_average_) {
		// use moving average mean
		caffe_copy(batch_statistic_.count(), this->blobs_[2]->cpu_data(), batch_statistic_.mutable_cpu_data());
	}
	// put mean blob into buffer_blob_
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1),
		batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0),
		spatial_statistic_.mutable_cpu_data());
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(-1),
		spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0),
		buffer_blob_.mutable_cpu_data());
	// substract mean
	caffe_add(buffer_blob_.count(), const_bottom_data, buffer_blob_.cpu_data(), top_data);

	// ---------- variance normalization ---------- //
	// put the squares of X - mean into buffer_blob_
	caffe_powx(buffer_blob_.count(), const_top_data, Dtype(2), buffer_blob_.mutable_cpu_data());
	// statistic across spatial
	caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1. / (height_ * width_)), buffer_blob_.cpu_data(),
		spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data());
	// statistic across batch
	caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1. / num_), spatial_statistic_.cpu_data(),
		batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data());
	// save history variance
	if (this->phase_ == TRAIN) {
		caffe_cpu_axpby(batch_statistic_.count(), decay_, batch_statistic_.cpu_data(), Dtype(1) - decay_,
			this->blobs_[3]->mutable_cpu_data());
	}
	if (this->phase_ == TEST && moving_average_) {
		// use moving average variance
		caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(), batch_statistic_.mutable_cpu_data());
	}
	// add eps
	caffe_add_scalar(batch_statistic_.count(), var_eps_, batch_statistic_.mutable_cpu_data());
	// std
	caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(), Dtype(0.5),
		batch_statistic_.mutable_cpu_data());
	// put std blob into buffer_blob_
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1),
		batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0),
		spatial_statistic_.mutable_cpu_data());
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1),
		spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0),
		buffer_blob_.mutable_cpu_data());
	// variance normalization
	caffe_div(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data);

	// ---------- save x_norm and x_std ---------- //
	caffe_copy(buffer_blob_.count(), const_top_data, x_norm_.mutable_cpu_data());
	caffe_copy(batch_statistic_.count(), batch_statistic_.cpu_data(), x_std_.mutable_cpu_data());

	// ---------- scale ---------- //
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1),
		batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
		spatial_statistic_.mutable_cpu_data());
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1),
		spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0),
		buffer_blob_.mutable_cpu_data());
	caffe_mul(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data);

	// ---------- shift ---------- //
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1),
		batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
		spatial_statistic_.mutable_cpu_data());
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1),
		spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0),
		buffer_blob_.mutable_cpu_data());
	caffe_add(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data);

}
Exemplo n.º 20
0
void MVNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
                                   const vector<bool>& propagate_down,
                                   vector<Blob<Dtype>*>* bottom) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* top_data = top[0]->cpu_data();
    const Dtype* bottom_data = (*bottom)[0]->cpu_data();
    Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();

    int num;
    if (this->layer_param_.mvn_param().across_channels())
        num = (*bottom)[0]->num();
    else
        num = (*bottom)[0]->num() * (*bottom)[0]->channels();

    int dim = (*bottom)[0]->count() / num;
    Dtype eps = 1e-10;

    if (this->layer_param_.mvn_param().normalize_variance()) {
        caffe_mul(temp_.count(), top_data, top_diff, bottom_diff);
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff,
                              sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
                              mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              bottom_diff);
        caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff);

        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff,
                              sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
                              mean_.cpu_data(), sum_multiplier_.cpu_data(), 1.,
                              bottom_diff);

        caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim),
                        bottom_diff);

        // put the squares of bottom into temp_
        caffe_powx(temp_.count(), bottom_data, Dtype(2),
                   temp_.mutable_cpu_data());

        // computes variance using var(X) = E(X^2) - (EX)^2
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
                              sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());  // EX
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
                              sum_multiplier_.cpu_data(), 0.,
                              variance_.mutable_cpu_data());  // E(X^2)
        caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2),
                   temp_.mutable_cpu_data());  // (EX)^2
        caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(),
                  variance_.mutable_cpu_data());  // variance

        // normalize variance
        caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
                   variance_.mutable_cpu_data());

        caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data());

        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
                              variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              temp_.mutable_cpu_data());

        caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff);
    } else {
        caffe_copy(temp_.count(), top_diff, bottom_diff);
    }
}
Exemplo n.º 21
0
  void BNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      const vector<Blob<Dtype>*>& bottom) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

    Dtype* scale_diff = this->blobs_[0]->mutable_cpu_diff();
    Dtype* shift_diff = this->blobs_[1]->mutable_cpu_diff();
    const Dtype* scale_data = this->blobs_[0]->cpu_data();

    switch (this->layer_param_.bn_param().bn_mode()) {
    case BNParameter_BNMode_LEARN:
      // Propagate layer to parameters
      // gradient w.r.t. scale
      caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(),
          top_diff, buffer_blob_.mutable_cpu_data());
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
          H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_diff());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
          spatial_variance_.cpu_diff(),
          batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff);

      // gradient w.r.t. shift
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
          H_ * W_, Dtype(1), top_diff,
          spatial_sum_multiplier_.cpu_data(),
          Dtype(0), spatial_mean_.mutable_cpu_diff());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_,
          Dtype(1), spatial_mean_.cpu_diff(),
          batch_sum_multiplier_.cpu_data(),
          Dtype(0), shift_diff);

      // Propagate down

      // put scale * top_diff to buffer_blob_
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(),
          buffer_blob_.mutable_cpu_data());

      // use new top diff for computation
      caffe_mul(buffer_blob_.count(),  x_norm_.cpu_data(),
          buffer_blob_.cpu_data(), bottom_diff);
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
          Dtype(1), bottom_diff,
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
          spatial_mean_.cpu_data(),
          batch_sum_multiplier_.cpu_data(), Dtype(0),
          batch_mean_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_mean_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          bottom_diff);

      caffe_mul(buffer_blob_.count(),
          x_norm_.cpu_data(), bottom_diff, bottom_diff);

      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
          H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
          spatial_mean_.cpu_data(),
          batch_sum_multiplier_.cpu_data(), Dtype(0),
          batch_mean_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_mean_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(1), bottom_diff);

      caffe_cpu_axpby(buffer_blob_.count(), Dtype(1),
          buffer_blob_.cpu_data(), Dtype(-1. / (N_ * H_ * W_)),
          bottom_diff);

      // put the squares of bottom into buffer_blob_
      caffe_powx(buffer_blob_.count(), bottom_data, Dtype(2),
          buffer_blob_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_variance_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());

      caffe_div(buffer_blob_.count(), bottom_diff,
          buffer_blob_.cpu_data(), bottom_diff);
      break;
    case BNParameter_BNMode_INFERENCE:
      // Propagate layer to parameters
      // gradient w.r.t. scale
      caffe_mul(buffer_blob_.count(), bottom_data,
          top_diff, buffer_blob_.mutable_cpu_data());
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
          H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_diff());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
          spatial_variance_.cpu_diff(),
          batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff);

      // gradient w.r.t. shift
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
          H_ * W_, Dtype(1), top_diff,
          spatial_sum_multiplier_.cpu_data(),
          Dtype(0), spatial_mean_.mutable_cpu_diff());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_,
          Dtype(1), spatial_mean_.cpu_diff(),
          batch_sum_multiplier_.cpu_data(),
          Dtype(0), shift_diff);

      // Propagate down
      // put scale * top_diff to buffer_blob_
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(),
          bottom_diff);
      break;
    default:
      LOG(FATAL) << "Unknown BN mode.";
    }
  }
Exemplo n.º 22
0
  void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* top_data = top[0]->mutable_cpu_data();
    const Dtype* const_top_data = top[0]->cpu_data();

    const Dtype* scale_data = this->blobs_[0]->cpu_data();
    const Dtype* shift_data = this->blobs_[1]->cpu_data();

    switch (this->layer_param_.bn_param().bn_mode()) {
    case BNParameter_BNMode_LEARN:
      // put the squares of bottom into buffer_blob_
      caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
          buffer_blob_.mutable_cpu_data());

      // computes variance using var(X) = E(X^2) - (EX)^2
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
          Dtype(1. / (H_ * W_)), bottom_data,
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_),
          spatial_mean_.cpu_data(),
          batch_sum_multiplier_.cpu_data(), Dtype(0),
          batch_mean_.mutable_cpu_data());

      // E(X^2) across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
          Dtype(1. / (H_ * W_)), buffer_blob_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_data());
      // E(X^2) across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_),
          spatial_variance_.cpu_data(),
          batch_sum_multiplier_.cpu_data(), Dtype(0),
          batch_variance_.mutable_cpu_data());

      caffe_powx(batch_mean_.count(), batch_mean_.cpu_data(), Dtype(2),
          buffer_blob_.mutable_cpu_data());  // (EX)^2
      caffe_sub(batch_mean_.count(), batch_variance_.cpu_data(),
          buffer_blob_.cpu_data(),
          batch_variance_.mutable_cpu_data());  // variance

      // save top[1] (batch_mean) and top[2] (batch_variance)
      if (top.size() > 1) {
          caffe_copy(batch_mean_.count(), batch_mean_.cpu_data(),
              top[1]->mutable_cpu_data());
      }
      if (top.size() > 2) {
          caffe_copy(batch_variance_.count(), batch_variance_.cpu_data(),
              top[2]->mutable_cpu_data());
      }

      // do mean and variance normalization
      // subtract mean
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_,
          C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_mean_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(-1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());

      caffe_add(buffer_blob_.count(), bottom_data,
          buffer_blob_.cpu_data(), top_data);

      // normalize variance
      caffe_add_scalar(batch_variance_.count(), var_eps_,
        batch_variance_.mutable_cpu_data());
      caffe_powx(batch_variance_.count(),
          batch_variance_.cpu_data(), Dtype(0.5),
          batch_variance_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_,
          C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_variance_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());

      caffe_div(buffer_blob_.count(), const_top_data,
          buffer_blob_.cpu_data(), top_data);

      // Saving x_norm
      caffe_copy(buffer_blob_.count(), const_top_data,
          x_norm_.mutable_cpu_data());
      // scale
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_mul(buffer_blob_.count(), top_data,
          buffer_blob_.cpu_data(), top_data);

      // shift
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
          spatial_mean_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_add(buffer_blob_.count(), const_top_data,
          buffer_blob_.cpu_data(), top_data);
      break;
    case BNParameter_BNMode_INFERENCE:
      // scale
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_mul(buffer_blob_.count(), bottom_data,
          buffer_blob_.cpu_data(), top_data);

      // shift
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
          spatial_mean_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_add(buffer_blob_.count(), const_top_data,
          buffer_blob_.cpu_data(), top_data);
      break;
    default:
      LOG(FATAL) << "Unknown BN mode.";
    } 
  }
Exemplo n.º 23
0
  void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
                                   const vector<Blob<Dtype>*>& top) {
    const Dtype* const_bottom_data = bottom[0]->cpu_data();
    const Dtype* const_top_data = top[0]->cpu_data();
    Dtype* top_data = top[0]->mutable_cpu_data();

    const Dtype* scale_data = this->blobs_[0]->cpu_data();
    const Dtype* shift_data = this->blobs_[1]->cpu_data();

    // Mean normalization
    if (frozen_ || this->phase_ == TEST) {
      // Use the moving average mean
      caffe_copy(batch_statistic_.count(), this->blobs_[2]->cpu_data(),
                 batch_statistic_.mutable_cpu_data());
    }
    else {
      // Compute the mean by averaging over spatial and batch dimensions.
      caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
                            Dtype(1) / (height_ * width_), const_bottom_data,
                            spatial_sum_multiplier_.cpu_data(), Dtype(0),
                            spatial_statistic_.mutable_cpu_data());
      caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_,
                            Dtype(1) / num_, spatial_statistic_.cpu_data(),
                            batch_sum_multiplier_.cpu_data(), Dtype(0),
                            batch_statistic_.mutable_cpu_data());
      // Add to the moving average
      if (!frozen_) {
        caffe_cpu_axpby(batch_statistic_.count(),
                        Dtype(1) - bn_momentum_, batch_statistic_.cpu_data(),
                        bn_momentum_, this->blobs_[2]->mutable_cpu_data());
      }
    }
    // Broadcast the mean vector
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
                          Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
                          Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
                          height_ * width_, 1, Dtype(-1),
                          spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
                          Dtype(0), broadcast_buffer_.mutable_cpu_data());
    // Subtract
    caffe_add(broadcast_buffer_.count(), const_bottom_data,
              broadcast_buffer_.cpu_data(), top_data);

    // Variance normalization
    if (frozen_ || this->phase_ == TEST) {
      // Use the moving average variance
      caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(),
                 batch_statistic_.mutable_cpu_data());
    }
    else {
      caffe_powx(broadcast_buffer_.count(), const_top_data, Dtype(2),
                 broadcast_buffer_.mutable_cpu_data());
      caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
                            Dtype(1) / (height_ * width_), broadcast_buffer_.cpu_data(),
                            spatial_sum_multiplier_.cpu_data(), Dtype(0),
                            spatial_statistic_.mutable_cpu_data());
      caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1) / num_,
                            spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
                            Dtype(0), batch_statistic_.mutable_cpu_data());
      // Add eps
      caffe_add_scalar(batch_statistic_.count(), bn_eps_,
                       batch_statistic_.mutable_cpu_data());
      // Inverse standard deviation
      caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(),
                 Dtype(-0.5), batch_statistic_.mutable_cpu_data());
      // Add to the moving average
      if (!frozen_) {
        caffe_cpu_axpby(batch_statistic_.count(),
                        Dtype(1) - bn_momentum_, batch_statistic_.cpu_data(),
                        bn_momentum_, this->blobs_[3]->mutable_cpu_data());
      }
    }
    // Broadcast the inverse std
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
                          Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
                          Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
                          height_ * width_, 1, Dtype(1),
                          spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
                          Dtype(0), broadcast_buffer_.mutable_cpu_data());
    // Multiply with the inverse std
    caffe_mul(broadcast_buffer_.count(), const_top_data,
              broadcast_buffer_.cpu_data(), top_data);

    // Save the normalized inputs and std for backprop
    if (!frozen_) {
      caffe_copy(broadcast_buffer_.count(), const_top_data,
                 x_norm_.mutable_cpu_data());
      caffe_copy(batch_statistic_.count(), batch_statistic_.cpu_data(),
                 x_inv_std_.mutable_cpu_data());
    }

    // Scale
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
                          Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data,
                          Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
                          height_ * width_, 1, Dtype(1),
                          spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
                          Dtype(0), broadcast_buffer_.mutable_cpu_data());
    caffe_mul(broadcast_buffer_.count(), const_top_data,
              broadcast_buffer_.cpu_data(), top_data);

    // Shift
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
                          Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data,
                          Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
                          height_ * width_, 1, Dtype(1),
                          spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
                          Dtype(0), broadcast_buffer_.mutable_cpu_data());
    caffe_add(broadcast_buffer_.count(), const_top_data,
              broadcast_buffer_.cpu_data(), top_data);
  }
Exemplo n.º 24
0
void AdaGradSolver<Dtype>::ComputeUpdateValue(uint_tp param_id, Dtype rate) {
  CHECK(Caffe::root_solver());
  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
  const vector<float>& net_params_lr = this->net_->params_lr();
  Dtype delta = this->param_.delta();
  Dtype local_rate = rate * net_params_lr[param_id];
  switch (Caffe::mode()) {
    case Caffe::CPU: {
      // compute square of gradient in update
      caffe_powx(net_params[param_id]->count(),
                 net_params[param_id]->cpu_diff(), Dtype(2),
                 this->update_[param_id]->mutable_cpu_data());

      // update history
      caffe_add(net_params[param_id]->count(),
                this->update_[param_id]->cpu_data(),
                this->history_[param_id]->cpu_data(),
                this->history_[param_id]->mutable_cpu_data());

      // prepare update
      caffe_powx(net_params[param_id]->count(),
                 this->history_[param_id]->cpu_data(), Dtype(0.5),
                 this->update_[param_id]->mutable_cpu_data());

      caffe_add_scalar(net_params[param_id]->count(), delta,
                       this->update_[param_id]->mutable_cpu_data());

      caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(),
                this->update_[param_id]->cpu_data(),
                this->update_[param_id]->mutable_cpu_data());

      // scale and copy
      caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
                      this->update_[param_id]->cpu_data(), Dtype(0),
                      net_params[param_id]->mutable_cpu_diff());
      break;
    }
    case Caffe::GPU: {
#ifndef CPU_ONLY
      if (this->device_->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
        // compute square of gradient in update
        caffe_gpu_powx(net_params[param_id]->count(),
                       net_params[param_id]->gpu_diff(), Dtype(2),
                       this->update_[param_id]->mutable_gpu_data());

        // update history
        caffe_gpu_add(net_params[param_id]->count(),
                      this->update_[param_id]->gpu_data(),
                      this->history_[param_id]->gpu_data(),
                      this->history_[param_id]->mutable_gpu_data());

        // prepare update
        caffe_gpu_powx(net_params[param_id]->count(),
                       this->history_[param_id]->gpu_data(), Dtype(0.5),
                       this->update_[param_id]->mutable_gpu_data());

        caffe_gpu_add_scalar(net_params[param_id]->count(), delta,
                             this->update_[param_id]->mutable_gpu_data());

        caffe_gpu_div(net_params[param_id]->count(),
                      net_params[param_id]->gpu_diff(),
                      this->update_[param_id]->gpu_data(),
                      this->update_[param_id]->mutable_gpu_data());

        // scale and copy
        caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
                        this->update_[param_id]->gpu_data(), Dtype(0),
                        net_params[param_id]->mutable_gpu_diff());
#endif  // USE_CUDA
      } else {
#ifdef USE_GREENTEA
        // compute square of gradient in update
        greentea_gpu_powx<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (net_params[param_id]->gpu_diff()), 0, Dtype(2),
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        // update history
        greentea_gpu_add<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            (cl_mem) (this->history_[param_id]->gpu_data()), 0,
            (cl_mem) (this->history_[param_id]->mutable_gpu_data()), 0);

        // prepare update
        greentea_gpu_powx<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (this->history_[param_id]->gpu_data()), 0, Dtype(0.5),
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        greentea_gpu_add_scalar<Dtype>(
            this->device_->id(), net_params[param_id]->count(), delta,
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        greentea_gpu_div<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (net_params[param_id]->gpu_diff()), 0,
            (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        // scale and copy
        greentea_gpu_axpby<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            local_rate, (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            Dtype(0), (cl_mem) (net_params[param_id]->mutable_gpu_diff()), 0);
#endif  // USE_GREENTEA
      }
#else
      NO_GPU;
#endif
      break;
    }
    default:
      LOG(FATAL)<< "Unknown caffe mode: " << Caffe::mode();
    }
  }