示例#1
0
void MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
                                  vector<Blob<Dtype>*>* top) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* top_data = (*top)[0]->mutable_cpu_data();
    int num;
    if (this->layer_param_.mvn_param().across_channels())
        num = bottom[0]->num();
    else
        num = bottom[0]->num() * bottom[0]->channels();

    int dim = bottom[0]->count() / num;
    Dtype eps = 1e-10;

    if (this->layer_param_.mvn_param().normalize_variance()) {
        // put the squares of bottom into temp_
        caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
                   temp_.mutable_cpu_data());

        // computes variance using var(X) = E(X^2) - (EX)^2
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
                              sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());  // EX
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
                              sum_multiplier_.cpu_data(), 0.,
                              variance_.mutable_cpu_data());  // E(X^2)
        caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2),
                   temp_.mutable_cpu_data());  // (EX)^2
        caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(),
                  variance_.mutable_cpu_data());  // variance

        // do mean and variance normalization
        // subtract mean
        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
                              mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              temp_.mutable_cpu_data());

        caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);

        // normalize variance
        caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
                   variance_.mutable_cpu_data());

        caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data());

        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
                              variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              temp_.mutable_cpu_data());

        caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
    } else {
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
                              sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());  // EX

        // subtract mean
        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
                              mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              temp_.mutable_cpu_data());

        caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);
    }
}
void SocketSyncCPU<Dtype>::on_gradients_ready() {
  // Reduce gradients from local CPU.
  P2PSyncCPU<Dtype>::on_gradients_ready();
  // Send gradients to corresponding parameter server node
  int peer = rank_ + 1;
  for (int n = 0; n < peers_.size() - 1; ++n) {
    if (peer == peers_.size()) {
      peer = 0;
    }
    diff_send_[peer]->Write();
    peer++;
  }
  // Sum gradients as they are received
  peer = rank_ + 1;
  for (int n = 0; n < peers_.size() - 1; ++n) {
    if (peer == peers_.size()) {
      peer = 0;
    }
    SocketBuffer * buffer = diff_recv_[peer]->Read();
    Dtype* src = reinterpret_cast<Dtype*>(buffer->addr());
    Dtype* dst = diff_ + own_offs_;
    caffe_add(own_size_, src, dst, dst);
    peer++;
  }
}
void SigmoidWeightedCrossEntropyLossLayer<Dtype>::Backward_cpu(
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[2]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to weight inputs.";
  }
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    // First, compute the diff
    const int count = bottom[0]->count();
    const int num = bottom[0]->num();
    const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
    const Dtype* target = bottom[1]->cpu_data();
    const Dtype* weight = bottom[2]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

    Dtype* tmp = new Dtype[count << 1];
    Dtype* tmp1 = tmp + count;

    // diff: 1/2
    caffe_set(count, (Dtype)0.5, bottom_diff);
    // diff: 1/2 * \hat{p}
    caffe_mul(count, bottom_diff, sigmoid_output_data, bottom_diff);
    // diff: 1/2 * (1-p) * \hat{p}
    caffe_set(count, (Dtype)1, tmp1);
    caffe_sub(count, tmp1, target, tmp);
    caffe_mul(count, bottom_diff, tmp, bottom_diff);
    // diff: 1/2(1-w) * (1-p) * \hat{p}
    caffe_sub(count, tmp1, weight, tmp);
    caffe_div(count, bottom_diff, tmp, bottom_diff);

    // tmp: 1-\hat{p}
    caffe_sub(count, tmp1, sigmoid_output_data, tmp);
    // tmp: p * (1-\hat{p})
    caffe_mul(count, tmp, target, tmp);
    // tmp: -1/2 * p * (1-\hat{p})
    caffe_set(count, (Dtype)-0.5, tmp1);
    caffe_mul(count, tmp, tmp1, tmp);
    // tmp: -1/2w * p * (1-\hat{p})
    caffe_div(count, tmp, weight, tmp);
    // diff: -(1/2w * p * (1-\hat{p}) - 1/2(1-w) * (1-p) * \hat{p})
    caffe_add(count, bottom_diff, tmp, bottom_diff);

    delete[] tmp;

    // Scale down gradient
    const Dtype loss_weight = top[0]->cpu_diff()[0];
    caffe_scal(count, loss_weight / num, bottom_diff);
  }
}
示例#4
0
文件: mvn_layer.cpp 项目: 0hm/caffe
void MVNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  const Dtype* top_diff = top[0]->cpu_diff();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

  int num;
  if (this->layer_param_.mvn_param().across_channels())
    num = bottom[0]->num();
  else
    num = bottom[0]->num() * bottom[0]->channels();

  int dim = bottom[0]->count() / num;

  if (this->layer_param_.mvn_param().normalize_variance()) {
    caffe_mul(temp_.count(), top_data, top_diff, bottom_diff);
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff,
          sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
          mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
          bottom_diff);
    caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff);

    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff,
            sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
            mean_.cpu_data(), sum_multiplier_.cpu_data(), 1.,
            bottom_diff);

    caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim),
        bottom_diff);

    // put the squares of bottom into temp_
    caffe_powx(temp_.count(), bottom_data, Dtype(2),
        temp_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
        variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
        temp_.mutable_cpu_data());

    caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff);
  } else {
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, top_diff,
      sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
      mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
      temp_.mutable_cpu_data());
    caffe_add(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff);
  }
}
void BilinearPatchFastLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  caffe_set(bottom[0]->num()*bottom[0]->channels()*bottom[0]->height()*bottom[0]->width(), Dtype(0.0), bottom[0]->mutable_cpu_diff());
  caffe_set(bottom[1]->num()*bottom[1]->channels()*bottom[1]->height()*bottom[1]->width(), Dtype(0.0), bottom[1]->mutable_cpu_diff());


  for (int n = 0; n < bottom[0]->num(); n++){

    for(int i = 0; i < poolingFieldsNum; i++){
      if (propagate_down[0]) {
        
        multiplyAllChannelsByMask(bottom[1]->cpu_data() + bottom[1]->channels() * bottom[1]->height() * bottom[1]->width() * n, bottom[2]->cpu_data() + bottom[2]->channels() * bottom[2]->height() * bottom[2]->width() * n, i, masked_buffer2.mutable_cpu_data(), bottom[1]->height()*bottom[1]->width(), bottom[1]->channels());

        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, bottom[0]->channels(), bottom[0]->width()*bottom[0]->height(), bottom[1]->channels(),(Dtype)1., top[0]->cpu_diff() + n * top[0]->channels()  + i * bottom[0]->channels() * bottom[1]->channels(), masked_buffer2.cpu_data(), (Dtype)0., dlda_buffer.mutable_cpu_diff());
	
	
	multiplyAllChannelsByMask(dlda_buffer.cpu_diff(), bottom[2]->cpu_data() + bottom[2]->channels() * bottom[2]->height() * bottom[2]->width() * n, i,dlda_buffer.mutable_cpu_diff(), bottom[0]->height()*bottom[0]->width(), bottom[0]->channels());

        caffe_add(bottom[0]->channels()*bottom[0]->height()*bottom[0]->width(), dlda_buffer.cpu_diff(), bottom[0]->cpu_diff() + bottom[0]->channels() * bottom[0]->height() * bottom[0]->width() * n, bottom[0]->mutable_cpu_diff() + bottom[0]->channels() * bottom[0]->height() * bottom[0]->width() * n);

      }
	
      if (propagate_down[1]) {

         multiplyAllChannelsByMask(bottom[0]->cpu_data() + bottom[0]->channels() * bottom[0]->height() * bottom[0]->width() * n, bottom[2]->cpu_data() + bottom[2]->channels() * bottom[2]->height() * bottom[2]->width() * n, i, masked_buffer1.mutable_cpu_data(), bottom[0]->height()*bottom[0]->width(), bottom[0]->channels());
        
        caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, bottom[1]->channels(), bottom[1]->width()*bottom[1]->height(), bottom[0]->channels(),(Dtype)1., top[0]->cpu_diff() + n * top[0]->channels()  + i * bottom[0]->channels() * bottom[1]->channels(), masked_buffer1.cpu_data(), (Dtype)0., dldb_buffer.mutable_cpu_diff());


	multiplyAllChannelsByMask(dldb_buffer.cpu_diff(), bottom[2]->cpu_data() + bottom[2]->channels() * bottom[2]->height() * bottom[2]->width() * n, i,dldb_buffer.mutable_cpu_diff(), bottom[1]->height()*bottom[1]->width(), bottom[1]->channels());

        caffe_add(bottom[1]->channels()*bottom[1]->height()*bottom[1]->width(), dldb_buffer.cpu_diff(), bottom[1]->cpu_diff() + bottom[1]->channels() * bottom[1]->height() * bottom[1]->width() * n, bottom[1]->mutable_cpu_diff() + bottom[1]->channels() * bottom[1]->height() * bottom[1]->width() * n);

      }
    }
  }

}
	void TripletClipHingeLossLayer<Dtype>::
		average_hashing(const vector<Blob<Dtype>*>& bottom){

			int batch_size = bottom[0]->num() / frame_num;
			caffe_set(batch_size*dim, Dtype(0.0), ave_or.mutable_cpu_data());
			caffe_set(batch_size*dim, Dtype(0.0), ave_si.mutable_cpu_data());
			caffe_set(batch_size*dim, Dtype(0.0), ave_di.mutable_cpu_data());

			for (int i = 0; i < batch_size; ++i){
				for (int j = 0; j < frame_num; ++j){
					int index = i*frame_num*dim + j*dim;
					caffe_add(dim, bottom[0]->cpu_data() + index,
						ave_or.cpu_data() + i*dim, ave_or.mutable_cpu_data() + i*dim);
					caffe_add(dim, bottom[1]->cpu_data() + index,
						ave_si.cpu_data() + i*dim, ave_si.mutable_cpu_data() + i*dim);
					caffe_add(dim, bottom[2]->cpu_data() + index,
						ave_di.cpu_data() + i*dim, ave_di.mutable_cpu_data() + i*dim);
				}
				caffe_scal(dim, 1 / Dtype(frame_num), ave_or.mutable_cpu_data() + i*dim);
				caffe_scal(dim, 1 / Dtype(frame_num), ave_si.mutable_cpu_data() + i*dim);
				caffe_scal(dim, 1 / Dtype(frame_num), ave_di.mutable_cpu_data() + i*dim);
			}
		}
示例#7
0
void NoiseLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  Dtype* rand_vec_data = rand_vec_.mutable_cpu_data();
  const int count = bottom[0]->count();
  // create gaussian noise and add to top, in-place/ or not the same
  if (sigma_> 0) {
    caffe_rng_gaussian(count, Dtype(0), sigma_, rand_vec_data);
  } else if (bottom[0] == top[0]) {
  } else {
    caffe_set(count, Dtype(0), rand_vec_data);
  }
  // use copy not add
  caffe_add(count, rand_vec_data, bottom_data, top_data);
}
示例#8
0
void SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  if (top.size() == 1) {
    caffe_copy(count_, top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff());
    return;
  }
  caffe_add(count_, top[0]->cpu_diff(), top[1]->cpu_diff(),
            bottom[0]->mutable_cpu_diff());
  // Add remaining top blob diffs.
  for (int i = 2; i < top.size(); ++i) {
    const Dtype* top_diff = top[i]->cpu_diff();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff);
  }
}
示例#9
0
void NoiseLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  const int count = bottom[0]->count();
  if (this->phase_ == TRAIN) {
    Dtype data_magnitude = sqrt(bottom[0]->sumsq_data() / Dtype(bottom[0]->count()));
    if (this->layer_param_.noise_param().has_gaussian_std()) {
      caffe_rng_gaussian<Dtype>(count, this->layer_param_.noise_param().bias(),
                                data_magnitude * this->layer_param_.noise_param().gaussian_std(), mask.mutable_cpu_data());
    }
    else if (this->layer_param_.noise_param().has_uniform_range()) {
      caffe_rng_uniform<Dtype>(count, this->layer_param_.noise_param().bias() - this->layer_param_.noise_param().uniform_range(),
                        this->layer_param_.noise_param().bias() + this->layer_param_.noise_param().uniform_range(), mask.mutable_cpu_data());
    }
    caffe_add(count, bottom_data, mask.cpu_data(), top_data);
  } else {
    caffe_copy(count, bottom_data, top_data);
  }
}
示例#10
0
void CapSequenceLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  vector<int> lengths;
  const int num_lengths =
    this->runtime_param().cap_sequence_param().sequence_lengths_size();
  for (int i = 0; i < num_lengths; ++i) {
    lengths.push_back(
        this->runtime_param().cap_sequence_param().sequence_lengths(i));
  }
  int size = 1;
  for (int i = 1; i < bottom[0]->shape().size(); ++i) {
    size *= bottom[0]->shape(i);
  }
  for (int i = 0; i < lengths.size(); ++i) {
    const int offset = i * size;
    caffe_add(size, bottom[lengths[i]]->cpu_diff() + offset,
        top[0]->cpu_diff() + offset,
        bottom[lengths[i]]->mutable_cpu_diff() + offset);
  }
}
示例#11
0
文件: mvn_layer.cpp 项目: 0hm/caffe
void MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  int num;
  if (this->layer_param_.mvn_param().across_channels())
    num = bottom[0]->num();
  else
    num = bottom[0]->num() * bottom[0]->channels();

  int dim = bottom[0]->count() / num;

  // subtract mean
  caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
      sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());  // EX
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
      mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
      temp_.mutable_cpu_data());
  caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);  // X-EX

  if (this->layer_param_.mvn_param().normalize_variance()) {
    // compute variance using var(X) = E((X-EX)^2)
    caffe_powx(bottom[0]->count(), top_data, Dtype(2),
        temp_.mutable_cpu_data());  // (X-EX)^2
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
        sum_multiplier_.cpu_data(), 0.,
        variance_.mutable_cpu_data());  // E((X-EX)^2)

    // normalize variance
    caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
          variance_.mutable_cpu_data());

    caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data());

    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
          variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
          temp_.mutable_cpu_data());

    caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
  }
}
示例#12
0
void LocalLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {

  Dtype* x_data = col_buffer_.mutable_cpu_data();
  const Dtype* weight = this->blobs_[0]->cpu_data();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();

  Blob<Dtype> E;
  E.Reshape(1, 1, 1, K_);
  FillerParameter filler_param;
  filler_param.set_value(1);
  ConstantFiller<Dtype> filler(filler_param);
  filler.Fill(&E);

  Blob<Dtype> intermediate;
  intermediate.Reshape(1, 1, K_, N_);
  for (int n=0; n<num_; n++) {
    im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_,
               width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data);

    for (int m=0; m<num_output_; m++) { 
      caffe_mul(K_*N_, x_data, weight+this->blobs_[0]->offset(m),
                intermediate.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, 1, N_, K_,
                            (Dtype)1., E.cpu_data(),
                            intermediate.cpu_data(),
                            (Dtype)0., top_data + top[0]->offset(n, m));
    }

    if (bias_term_) {
      caffe_add(M_ * N_, this->blobs_[1]->cpu_data(),
                top_data + top[0]->offset(n),
                top_data + top[0]->offset(n));
    }
  }
}
示例#13
0
void Tensor<Dtype>::AddFrom(const Tensor& source) {
  if (source.count() != count_ || source.shape() != shape_) {
    ASSERT(false, "Trying to add blobs of different sizes: "
      << source.count() << " != " << count_);
  }
  switch (mode()) {
  case Caffe::CPU:
    caffe_add(count_, source.cpu_mem(),
        this->cpu_mem(),
        this->mutable_cpu_mem());
    break;
  case Caffe::GPU:
#ifndef CPU_ONLY
    caffe_gpu_add(count_, source.gpu_mem(),
        this->gpu_mem(),
        this->mutable_gpu_mem());
#else
    NO_GPU;
#endif
    break;
  default:
    ASSERT(false, "Unknown caffe mode.");
  }
}
示例#14
0
  void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
                                   const vector<Blob<Dtype>*>& top) {
    const Dtype* const_bottom_data = bottom[0]->cpu_data();
    const Dtype* const_top_data = top[0]->cpu_data();
    Dtype* top_data = top[0]->mutable_cpu_data();

    const Dtype* scale_data = this->blobs_[0]->cpu_data();
    const Dtype* shift_data = this->blobs_[1]->cpu_data();

    // Mean normalization
    if (frozen_ || this->phase_ == TEST) {
      // Use the moving average mean
      caffe_copy(batch_statistic_.count(), this->blobs_[2]->cpu_data(),
                 batch_statistic_.mutable_cpu_data());
    }
    else {
      // Compute the mean by averaging over spatial and batch dimensions.
      caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
                            Dtype(1) / (height_ * width_), const_bottom_data,
                            spatial_sum_multiplier_.cpu_data(), Dtype(0),
                            spatial_statistic_.mutable_cpu_data());
      caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_,
                            Dtype(1) / num_, spatial_statistic_.cpu_data(),
                            batch_sum_multiplier_.cpu_data(), Dtype(0),
                            batch_statistic_.mutable_cpu_data());
      // Add to the moving average
      if (!frozen_) {
        caffe_cpu_axpby(batch_statistic_.count(),
                        Dtype(1) - bn_momentum_, batch_statistic_.cpu_data(),
                        bn_momentum_, this->blobs_[2]->mutable_cpu_data());
      }
    }
    // Broadcast the mean vector
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
                          Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
                          Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
                          height_ * width_, 1, Dtype(-1),
                          spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
                          Dtype(0), broadcast_buffer_.mutable_cpu_data());
    // Subtract
    caffe_add(broadcast_buffer_.count(), const_bottom_data,
              broadcast_buffer_.cpu_data(), top_data);

    // Variance normalization
    if (frozen_ || this->phase_ == TEST) {
      // Use the moving average variance
      caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(),
                 batch_statistic_.mutable_cpu_data());
    }
    else {
      caffe_powx(broadcast_buffer_.count(), const_top_data, Dtype(2),
                 broadcast_buffer_.mutable_cpu_data());
      caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
                            Dtype(1) / (height_ * width_), broadcast_buffer_.cpu_data(),
                            spatial_sum_multiplier_.cpu_data(), Dtype(0),
                            spatial_statistic_.mutable_cpu_data());
      caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1) / num_,
                            spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
                            Dtype(0), batch_statistic_.mutable_cpu_data());
      // Add eps
      caffe_add_scalar(batch_statistic_.count(), bn_eps_,
                       batch_statistic_.mutable_cpu_data());
      // Inverse standard deviation
      caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(),
                 Dtype(-0.5), batch_statistic_.mutable_cpu_data());
      // Add to the moving average
      if (!frozen_) {
        caffe_cpu_axpby(batch_statistic_.count(),
                        Dtype(1) - bn_momentum_, batch_statistic_.cpu_data(),
                        bn_momentum_, this->blobs_[3]->mutable_cpu_data());
      }
    }
    // Broadcast the inverse std
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
                          Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
                          Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
                          height_ * width_, 1, Dtype(1),
                          spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
                          Dtype(0), broadcast_buffer_.mutable_cpu_data());
    // Multiply with the inverse std
    caffe_mul(broadcast_buffer_.count(), const_top_data,
              broadcast_buffer_.cpu_data(), top_data);

    // Save the normalized inputs and std for backprop
    if (!frozen_) {
      caffe_copy(broadcast_buffer_.count(), const_top_data,
                 x_norm_.mutable_cpu_data());
      caffe_copy(batch_statistic_.count(), batch_statistic_.cpu_data(),
                 x_inv_std_.mutable_cpu_data());
    }

    // Scale
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
                          Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data,
                          Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
                          height_ * width_, 1, Dtype(1),
                          spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
                          Dtype(0), broadcast_buffer_.mutable_cpu_data());
    caffe_mul(broadcast_buffer_.count(), const_top_data,
              broadcast_buffer_.cpu_data(), top_data);

    // Shift
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
                          Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data,
                          Dtype(0), spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
                          height_ * width_, 1, Dtype(1),
                          spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
                          Dtype(0), broadcast_buffer_.mutable_cpu_data());
    caffe_add(broadcast_buffer_.count(), const_top_data,
              broadcast_buffer_.cpu_data(), top_data);
  }
void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
  const vector<float>& net_params_lr = this->net_->params_lr();
  Dtype delta = this->param_.delta();
  Dtype momentum = this->param_.momentum();
  Dtype local_rate = rate * net_params_lr[param_id];
  size_t update_history_offset = net_params.size();
  switch (Caffe::mode()) {
  case Caffe::CPU: {
    // compute square of gradient in update
    caffe_powx(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(), Dtype(2),
        this->update_[param_id]->mutable_cpu_data());

    // update history of gradients
    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
        this->update_[param_id]->cpu_data(), momentum,
        this->history_[param_id]->mutable_cpu_data());

    // add delta to history to guard against dividing by zero later
    caffe_set(net_params[param_id]->count(), delta,
        this->temp_[param_id]->mutable_cpu_data());

    caffe_add(net_params[param_id]->count(),
        this->temp_[param_id]->cpu_data(),
        this->history_[update_history_offset + param_id]->cpu_data(),
        this->update_[param_id]->mutable_cpu_data());

    caffe_add(net_params[param_id]->count(),
        this->temp_[param_id]->cpu_data(),
        this->history_[param_id]->cpu_data(),
        this->temp_[param_id]->mutable_cpu_data());

    // divide history of updates by history of gradients
    caffe_div(net_params[param_id]->count(),
        this->update_[param_id]->cpu_data(),
        this->temp_[param_id]->cpu_data(),
        this->update_[param_id]->mutable_cpu_data());

    // jointly compute the RMS of both for update and gradient history
    caffe_powx(net_params[param_id]->count(),
        this->update_[param_id]->cpu_data(), Dtype(0.5),
        this->update_[param_id]->mutable_cpu_data());

    // compute the update
    caffe_mul(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(),
        this->update_[param_id]->cpu_data(),
        net_params[param_id]->mutable_cpu_diff());

    // compute square of update
    caffe_powx(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(), Dtype(2),
        this->update_[param_id]->mutable_cpu_data());

    // update history of updates
    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
        this->update_[param_id]->cpu_data(), momentum,
        this->history_[update_history_offset + param_id]->mutable_cpu_data());

    // apply learning rate
    caffe_cpu_scale(net_params[param_id]->count(), local_rate,
        net_params[param_id]->cpu_diff(),
        net_params[param_id]->mutable_cpu_diff());
    break;
  }
  case Caffe::GPU: {
#ifndef CPU_ONLY
    adadelta_update_gpu(net_params[param_id]->count(),
        net_params[param_id]->mutable_gpu_diff(),
        this->history_[param_id]->mutable_gpu_data(),
        this->history_[update_history_offset + param_id]->mutable_gpu_data(),
        momentum, delta, local_rate);
#else
    NO_GPU;
#endif
    break;
  }
  default:
    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
  }
}
示例#16
0
void EltwiseLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  int* mask = NULL;
  const Dtype* bottom_data_a = NULL;
  const Dtype* bottom_data_b = NULL;
  const int count = top[0]->count();
  Dtype* top_data = top[0]->mutable_cpu_data();
  if (broadcast_) {
    int dima[4];
    int dimb[4];
    for (int i=0; i<4; i++)
    {
      dima[i] = bottom[0]->shape()[i];
      dimb[i] = bottom[1]->shape()[i];
    }
    bottom_data_a = bottom[0]->cpu_data();
    bottom_data_b = bottom[1]->cpu_data();

    switch (op_) {
    case EltwiseParameter_EltwiseOp_PROD:
      caffe_mul_broadcast<Dtype>(dima, dimb, bottom_data_a, bottom_data_b, top_data);
      break;
    case EltwiseParameter_EltwiseOp_SUM:
      caffe_add_broadcast<Dtype>(dima, dimb, bottom_data_a, bottom_data_b, top_data);
      break;
    default:
      LOG(FATAL) << "Unknown elementwise broadcast operation.";
    }
  } else {
    switch (op_) {
    case EltwiseParameter_EltwiseOp_PROD:
      caffe_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data);
      for (int i = 2; i < bottom.size(); ++i) {
        caffe_mul(count, top_data, bottom[i]->cpu_data(), top_data);
      }
      break;
    case EltwiseParameter_EltwiseOp_SUM:
      if (coeffs_[0]==1.0) {
        caffe_copy(count, bottom[0]->cpu_data(), top_data);
      } else {
        caffe_set(count, Dtype(0.), top_data);
        caffe_axpy(count, coeffs_[0], bottom[0]->cpu_data(), top_data);
      }

      for (int i = 1; i < bottom.size(); ++i) {
        if (coeffs_[i]==1.0)
          caffe_add (count, top_data, bottom[i]->cpu_data(), top_data);
        else
          caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data);
      }
      break;
    case EltwiseParameter_EltwiseOp_MAX:
      // Initialize
      mask = max_idx_.mutable_cpu_data();
      caffe_set(count, -1, mask);
      caffe_set(count, Dtype(-FLT_MAX), top_data);
      // bottom 0 & 1
      bottom_data_a = bottom[0]->cpu_data();
      bottom_data_b = bottom[1]->cpu_data();
      for (int idx = 0; idx < count; ++idx) {
        if (bottom_data_a[idx] > bottom_data_b[idx]) {
          top_data[idx] = bottom_data_a[idx];  // maxval
          mask[idx] = 0;  // maxid
        } else {
          top_data[idx] = bottom_data_b[idx];  // maxval
          mask[idx] = 1;  // maxid
        }
      }
      // bottom 2++
      for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) {
        bottom_data_b = bottom[blob_idx]->cpu_data();
        for (int idx = 0; idx < count; ++idx) {
          if (bottom_data_b[idx] > top_data[idx]) {
            top_data[idx] = bottom_data_b[idx];  // maxval
            mask[idx] = blob_idx;  // maxid
          }
        }
      }
      break;
    default:
      LOG(FATAL) << "Unknown elementwise operation.";
    }
  }
}
示例#17
0
void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
	const vector<Blob<Dtype>*>& top) {
	const Dtype* const_bottom_data = bottom[0]->cpu_data();
	const Dtype* const_top_data = top[0]->cpu_data();
	Dtype* top_data = top[0]->mutable_cpu_data();

	const Dtype* scale_data = this->blobs_[0]->cpu_data();
	const Dtype* shift_data = this->blobs_[1]->cpu_data();

	// ---------- mean subtraction ---------- //
	// statistic across spatial
	caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1. / (height_ * width_)), const_bottom_data,
		spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data());
	// statistic across batch
	caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1. / num_), spatial_statistic_.cpu_data(),
		batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data());
	// save history mean
	if (this->phase_ == TRAIN) {
		caffe_cpu_axpby(batch_statistic_.count(), decay_, batch_statistic_.cpu_data(), Dtype(1) - decay_,
			this->blobs_[2]->mutable_cpu_data());
	}
	if (this->phase_ == TEST && moving_average_) {
		// use moving average mean
		caffe_copy(batch_statistic_.count(), this->blobs_[2]->cpu_data(), batch_statistic_.mutable_cpu_data());
	}
	// put mean blob into buffer_blob_
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1),
		batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0),
		spatial_statistic_.mutable_cpu_data());
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(-1),
		spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0),
		buffer_blob_.mutable_cpu_data());
	// substract mean
	caffe_add(buffer_blob_.count(), const_bottom_data, buffer_blob_.cpu_data(), top_data);

	// ---------- variance normalization ---------- //
	// put the squares of X - mean into buffer_blob_
	caffe_powx(buffer_blob_.count(), const_top_data, Dtype(2), buffer_blob_.mutable_cpu_data());
	// statistic across spatial
	caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1. / (height_ * width_)), buffer_blob_.cpu_data(),
		spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data());
	// statistic across batch
	caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1. / num_), spatial_statistic_.cpu_data(),
		batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data());
	// save history variance
	if (this->phase_ == TRAIN) {
		caffe_cpu_axpby(batch_statistic_.count(), decay_, batch_statistic_.cpu_data(), Dtype(1) - decay_,
			this->blobs_[3]->mutable_cpu_data());
	}
	if (this->phase_ == TEST && moving_average_) {
		// use moving average variance
		caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(), batch_statistic_.mutable_cpu_data());
	}
	// add eps
	caffe_add_scalar(batch_statistic_.count(), var_eps_, batch_statistic_.mutable_cpu_data());
	// std
	caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(), Dtype(0.5),
		batch_statistic_.mutable_cpu_data());
	// put std blob into buffer_blob_
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1),
		batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0),
		spatial_statistic_.mutable_cpu_data());
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1),
		spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0),
		buffer_blob_.mutable_cpu_data());
	// variance normalization
	caffe_div(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data);

	// ---------- save x_norm and x_std ---------- //
	caffe_copy(buffer_blob_.count(), const_top_data, x_norm_.mutable_cpu_data());
	caffe_copy(batch_statistic_.count(), batch_statistic_.cpu_data(), x_std_.mutable_cpu_data());

	// ---------- scale ---------- //
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1),
		batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
		spatial_statistic_.mutable_cpu_data());
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1),
		spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0),
		buffer_blob_.mutable_cpu_data());
	caffe_mul(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data);

	// ---------- shift ---------- //
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1),
		batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
		spatial_statistic_.mutable_cpu_data());
	caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1),
		spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0),
		buffer_blob_.mutable_cpu_data());
	caffe_add(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data);

}
示例#18
0
  void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* top_data = top[0]->mutable_cpu_data();
    const Dtype* const_top_data = top[0]->cpu_data();

    const Dtype* scale_data = this->blobs_[0]->cpu_data();
    const Dtype* shift_data = this->blobs_[1]->cpu_data();

    switch (this->layer_param_.bn_param().bn_mode()) {
    case BNParameter_BNMode_LEARN:
      // put the squares of bottom into buffer_blob_
      caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
          buffer_blob_.mutable_cpu_data());

      // computes variance using var(X) = E(X^2) - (EX)^2
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
          Dtype(1. / (H_ * W_)), bottom_data,
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_),
          spatial_mean_.cpu_data(),
          batch_sum_multiplier_.cpu_data(), Dtype(0),
          batch_mean_.mutable_cpu_data());

      // E(X^2) across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
          Dtype(1. / (H_ * W_)), buffer_blob_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_data());
      // E(X^2) across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_),
          spatial_variance_.cpu_data(),
          batch_sum_multiplier_.cpu_data(), Dtype(0),
          batch_variance_.mutable_cpu_data());

      caffe_powx(batch_mean_.count(), batch_mean_.cpu_data(), Dtype(2),
          buffer_blob_.mutable_cpu_data());  // (EX)^2
      caffe_sub(batch_mean_.count(), batch_variance_.cpu_data(),
          buffer_blob_.cpu_data(),
          batch_variance_.mutable_cpu_data());  // variance

      // save top[1] (batch_mean) and top[2] (batch_variance)
      if (top.size() > 1) {
          caffe_copy(batch_mean_.count(), batch_mean_.cpu_data(),
              top[1]->mutable_cpu_data());
      }
      if (top.size() > 2) {
          caffe_copy(batch_variance_.count(), batch_variance_.cpu_data(),
              top[2]->mutable_cpu_data());
      }

      // do mean and variance normalization
      // subtract mean
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_,
          C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_mean_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(-1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());

      caffe_add(buffer_blob_.count(), bottom_data,
          buffer_blob_.cpu_data(), top_data);

      // normalize variance
      caffe_add_scalar(batch_variance_.count(), var_eps_,
        batch_variance_.mutable_cpu_data());
      caffe_powx(batch_variance_.count(),
          batch_variance_.cpu_data(), Dtype(0.5),
          batch_variance_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_,
          C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_variance_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());

      caffe_div(buffer_blob_.count(), const_top_data,
          buffer_blob_.cpu_data(), top_data);

      // Saving x_norm
      caffe_copy(buffer_blob_.count(), const_top_data,
          x_norm_.mutable_cpu_data());
      // scale
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_mul(buffer_blob_.count(), top_data,
          buffer_blob_.cpu_data(), top_data);

      // shift
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
          spatial_mean_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_add(buffer_blob_.count(), const_top_data,
          buffer_blob_.cpu_data(), top_data);
      break;
    case BNParameter_BNMode_INFERENCE:
      // scale
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_mul(buffer_blob_.count(), bottom_data,
          buffer_blob_.cpu_data(), top_data);

      // shift
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
          spatial_mean_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_add(buffer_blob_.count(), const_top_data,
          buffer_blob_.cpu_data(), top_data);
      break;
    default:
      LOG(FATAL) << "Unknown BN mode.";
    } 
  }
示例#19
0
void CRFWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
	if (propagate_down[1]) {
		LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs.";
	}
	if (propagate_down[0]) {
		// Backward flow is splited into 2 ways , one of which is to the local parameter, 
		// and the other is to the lower layer through the diff blob of bottom[0]
		Dtype* ptr_pi_diff = this->blobs_[0]->mutable_cpu_diff();
		Dtype* ptr_tr_diff = this->blobs_[1]->mutable_cpu_diff();
		Dtype* ptr_mu_diff = this->blobs_[2]->mutable_cpu_diff();
		Dtype* ptr_bottom_diff = bottom[0]->mutable_cpu_diff(); 

		Dtype* ptr_state_err = gamma_.mutable_cpu_data();
		Dtype* ptr_trans_err = epsilon_.mutable_cpu_data();
        
		// some data needed 
		const Dtype* state_err = gamma_.cpu_data();
		const Dtype* trans_err = epsilon_.cpu_data();
		const Dtype* feature_table = bottom[0]->cpu_data();
		const Dtype* label = bottom[1]->cpu_data();
	    const Dtype* mu = this->blobs_[2]->cpu_data(); 
		const Dtype* pi_diff = this->blobs_[0]->cpu_diff();

	    // same bias needed
		int ts = max_seq_length_ * feature_num_;  
		int gs = max_seq_length_ * state_num_;
		int eps = max_seq_length_ * state_num_ * state_num_;

		for (int i = 0; i < num_; ++i) {
			// seq length of each instance should be different.. need to be reconsidered here 
			int sl = max_seq_length_;

			// compute the state energy err and state trans err at each position of each instance
			for (int j = 0; j < sl; ++j) {
				int idx = *(label + i * max_seq_length_ + j); 
				if (idx >= 0 && idx < state_num_) {
					*(ptr_state_err + i * gs +  j * state_num_ + idx) += 1;	
				} else {
					// TODO
				}
				if ( j >= sl - 1 ) 
					continue;
				int idx_next = *(label + i * max_seq_length_ + j + 1);	
				if (idx >= 0 && idx < state_num_ && idx_next >= 0 && idx_next < state_num_) {
					*(ptr_trans_err + i * gs +  j * state_num_ * state_num_ + idx * state_num_ + idx_next) += 1;	
				} else {
					// TODO
				}
			} 	

			// Backward to input blob, bottom_dif = Mu' dot state_err' 
			caffe_cpu_gemm(CblasTrans, CblasTrans, feature_num_, sl, state_num_, (Dtype)1., 
			    mu, state_err + i * gs, (Dtype)0., ptr_bottom_diff + i * ts);  
			// Backward to pi, pi += state_err(0) 
			caffe_add(state_num_, pi_diff, state_err + i * gs, ptr_pi_diff); 	
			// Backward to mu, mu += state_err' dot  bottom[0]' 
			caffe_cpu_gemm(CblasTrans, CblasTrans, state_num_, feature_num_, sl, (Dtype)1.,
			    state_err + i * gs, feature_table + i * gs, (Dtype)1., ptr_mu_diff);     	
			// Backward to tr, sum_t(state_trans_err(t))
			caffe_cpu_gemv(CblasNoTrans, state_num_ * state_num_, sl, (Dtype)1., 
				trans_err + i * eps, multiplier_seq_len_.cpu_data(), (Dtype)0., ptr_tr_diff);
		} 
	  }
}	 
	void TripletClipHingeLossLayer<Dtype>::Backward_cpu(
		const vector<Blob<Dtype>*>& top, const vector<bool> &propagate_down,
		const vector<Blob<Dtype>*>& bottom){
		const Dtype* orignalcode;
		const Dtype* similarcode;
		const Dtype* diffrcode;
		if (propagate_down[0]) {
			for (int i = 0; i < 3; ++i) {// for each stream need to get a loss
				int num = bottom[i]->num();
				int channels = bottom[i]->channels();
				for (int j = 0; j < num; ++j){
					Dtype* bout = bottom[i]->mutable_cpu_diff();// get the 3 bottoms' address, the i th bottom's address
					orignalcode = ave_or.cpu_data() + (j / frame_num)*dim;
					similarcode = ave_si.cpu_data() + (j / frame_num)*dim;
					diffrcode = ave_di.cpu_data() + (j / frame_num)*dim;
					if (i == 0){
						if (dist_sq_.cpu_data()[j / frame_num]>Dtype(FLT_MIN)){
							caffe_sub(dim, diffrcode, similarcode,
								gradient_triplet.mutable_cpu_data());// the distance of F- and F+
							caffe_scal(dim, Dtype(2) / Dtype(num),
								gradient_triplet.mutable_cpu_data());
						}
						else
							caffe_set(dim, Dtype(FLT_MIN),
							gradient_triplet.mutable_cpu_data());
						compute_gradient_structure(i, j);
						caffe_scal(dim, lamda, gradient_triplet.mutable_cpu_data());
						caffe_scal(dim, Dtype(1.0) - lamda, gradient_structure.mutable_cpu_data());
						caffe_add(dim, gradient_triplet.cpu_data(),
							gradient_structure.cpu_data(), gradient.mutable_cpu_data());
					}
					if (i == 1){
						if (dist_sq_.cpu_data()[j / frame_num] > Dtype(FLT_MIN)){
							caffe_sub(dim, similarcode, orignalcode,
								gradient_triplet.mutable_cpu_data());// the distance of F+ and F
							caffe_scal(dim, Dtype(2) / Dtype(num),
								gradient_triplet.mutable_cpu_data());
						}
						else
							caffe_set(dim, Dtype(FLT_MIN),
							gradient_triplet.mutable_cpu_data());
						compute_gradient_structure(i, j);
						caffe_scal(dim, lamda, gradient_triplet.mutable_cpu_data());
						caffe_scal(dim, Dtype(1.0) - lamda, gradient_structure.mutable_cpu_data());
						caffe_add(dim, gradient_triplet.cpu_data(),
							gradient_structure.cpu_data(), gradient.mutable_cpu_data());
					}
					if (i == 2){
						if (dist_sq_.cpu_data()[j / frame_num] > Dtype(FLT_MIN)){
							caffe_sub(dim, orignalcode, diffrcode,
								gradient_triplet.mutable_cpu_data());
							caffe_scal(dim, Dtype(2) / Dtype(num),
								gradient_triplet.mutable_cpu_data());
						}
						else
							caffe_set(dim, Dtype(FLT_MIN),
							gradient_triplet.mutable_cpu_data());
						compute_gradient_structure(i, j);
						caffe_scal(dim, lamda, gradient_triplet.mutable_cpu_data());
						caffe_scal(dim, Dtype(1.0) - lamda, gradient_structure.mutable_cpu_data());
						caffe_add(dim, gradient_triplet.cpu_data(),
							gradient_structure.cpu_data(), gradient.mutable_cpu_data());
					}
					caffe_scal(dim, Dtype(2.0), gradient.mutable_cpu_data());
					caffe_copy(channels, gradient.cpu_data(), bout + (j*channels));//return the BP vector to the j th batch's bottom
				}
			}
		}
	}
void InnerProductForRegularizeLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
	InnerProductLayer<Dtype>::Backward_cpu(top,propagate_down,bottom);

	caffe_add(top[1]->count(), top[1]->cpu_diff(), this->blobs_[0]->cpu_diff(), this->blobs_[0]->mutable_cpu_diff());
}
示例#22
0
void LocalLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

  const Dtype* top_diff = top[0]->cpu_diff();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  Dtype* x_data = col_buffer_.mutable_cpu_data();
  Dtype* x_diff = col_buffer_.mutable_cpu_diff();
  const Dtype* weight = this->blobs_[0]->cpu_data();
  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
  Dtype* bias_diff = NULL;

  Blob<Dtype> intermediate;
  intermediate.Reshape(1, 1, 1, N_);

  Blob<Dtype> xt;
  xt.Reshape(1, 1, K_, N_);
  Dtype* xt_data = xt.mutable_cpu_data();

  if (bias_term_) {
    bias_diff = this->blobs_[1]->mutable_cpu_diff();
    memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count());
    for (int n = 0; n < num_; ++n) {
      caffe_add(M_ * N_, bias_diff,
                top_diff + top[0]->offset(n),
                bias_diff);
    }
  }

  memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
  for (int n=0; n<num_; n++) {
    im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_,
               width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data);

    // gradient wrt weight
    for (int m=0; m<num_output_; m++) {
      Dtype* filter_weight_diff = weight_diff+this->blobs_[0]->offset(m);
      for (int k=0; k<K_; k++) {
        caffe_mul(N_, top_diff+top[0]->offset(n, m),  
                  x_data+col_buffer_.offset(0,k), xt_data+xt.offset(0,0,k));
      }
      caffe_cpu_axpby(K_*N_, Dtype(1.0), xt_data, Dtype(1.0), filter_weight_diff);
    }
      
    // gradient wrt bottom data
    if (propagate_down[0]) {
      memset(x_diff, 0, col_buffer_.count() * sizeof(Dtype));
      for (int m=0; m<num_output_; m++) {
        for (int k=0; k<K_; k++) {
          caffe_mul(N_, top_diff+top[0]->offset(n, m),
                    weight+this->blobs_[0]->offset(m,0,k),
                    intermediate.mutable_cpu_data());

          caffe_cpu_axpby(N_, Dtype(1.0),
                          intermediate.cpu_data(), Dtype(1.0),
                          x_diff+col_buffer_.offset(0,k));
        }
      }

      // col2im back to the data
      col2im_cpu(x_diff, channels_, height_, width_, kernel_size_, kernel_size_,
                 pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n));

    }
  }

}
示例#23
0
	void DeconvNormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
	{
		const Dtype* top_diff = top[0]->cpu_diff();
		Dtype* deconv1_top_vec_diff = deconv1_top_vec[0]->mutable_cpu_diff();
		Dtype* deconv2_top_vec_diff = deconv2_top_vec[0]->mutable_cpu_diff();
		const Dtype* deconv2_top_vec_data = deconv2_top_vec[0]->cpu_data();
		const Dtype* deconv1_top_vec_data = deconv1_top_vec[0]->cpu_data();

		caffe_set(deconv2_top_vec[0]->count(), (Dtype)0, deconv2_top_vec_diff);
		caffe_set(deconv1_top_vec[0]->count(), (Dtype)0, deconv1_top_vec_diff);
		caffe_set(exp_top_vec[0]->count(), (Dtype)0, exp_top_vec[0]->mutable_cpu_diff());
		//caffe_set(exp_bottom_vec[0]->count(), (Dtype)0, exp_bottom_vec[0]->mutable_cpu_diff());

		caffe_set(deconv1_layer->blobs()[0]->count(), (Dtype)0, deconv1_layer->blobs()[0]->mutable_cpu_diff());
		caffe_set(deconv2_layer->blobs()[0]->count(), (Dtype)0, deconv2_layer->blobs()[0]->mutable_cpu_diff());

		//bias gradient, if necessary
		if (this->bias_term_ && this->param_propagate_down_[2])
		{
			Dtype* bias_diff = this->blobs_[2]->mutable_cpu_diff();
			for (int n = 0; n < top[0]->num(); ++n)
			{
				caffe_cpu_gemv<Dtype>(CblasNoTrans, top[0]->channels(), top[0]->height() * top[0]->width(), 
					1., top_diff+top[0]->offset(n), bias_multiplier.cpu_data(), 1., bias_diff);
			}
		}
		// weights and alpha gradient, propagate down to bottom
		if (param_propagate_down_[0] || param_propagate_down_[1] || propagate_down[0])
		{
			vector<bool> no_propagate_down;
			no_propagate_down.push_back(false);
			vector<bool> yes_propagate_down;
			yes_propagate_down.push_back(true);
			// top_diff backward to deconv2_top_vec_diff
			for (int n = 0; n < top[0]->num(); ++n)
			{
				caffe_div(deconv1_top_vec[0]->count(), top_diff + top[0]->offset(n),
					deconv1_top_vec_data, deconv2_top_vec_diff + deconv2_top_vec[0]->offset(n));
			}
			// backward throud deconv2_layer
			deconv2_layer->Backward(deconv2_top_vec, propagate_down, bottom);
			const Dtype* wa_diff = weights_alphas->cpu_diff();
			// weight gradient
			if (param_propagate_down_[0])
			{
				Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
				const Dtype* alpha = alphas->cpu_data();
				for (int ch_in = 0; ch_in < weights_alphas->num(); ++ch_in)
				{
					caffe_mul(alphas->count(), wa_diff + weights_alphas->offset(ch_in),
						alpha, weight_diff + this->blobs_[0]->offset(ch_in));
				}
			}

			// alpha gradient
			if (param_propagate_down_[1] && average_train)
			{
				//alpha_diff1
				Dtype* alpha_cache_diff = alpha_cache.mutable_cpu_diff();
				Dtype* alpha_cache_diff2 = alpha_cache2.mutable_cpu_diff();
				caffe_set(alpha_cache.count(), (Dtype)0, alpha_cache_diff);
				caffe_set(alpha_cache2.count(), (Dtype)0, alpha_cache_diff2);
				const Dtype* weight = this->blobs_[0]->cpu_data();
				for (int ch_in = 0; ch_in < weights_alphas->num(); ++ch_in)
				{
					caffe_mul(alpha_cache.count(), wa_diff + weights_alphas->offset(ch_in),
						weight + this->blobs_[0]->offset(ch_in), alpha_cache_diff);
					caffe_add(alpha_cache2.count(), alpha_cache_diff, alpha_cache_diff2, alpha_cache_diff2);
				}
				// top_diff backward to deonv1_top_vec_diff
				Dtype* deconv1_top_cache_diff = deconv1_top_cache.mutable_cpu_diff();
				caffe_set(deconv1_top_cache.count(), (Dtype)0, deconv1_top_cache_diff);
				for (int n = 0; n < top[0]->num(); ++n)
				{
					caffe_mul(deconv1_top_cache.count(), top_diff + top[0]->offset(n),
						deconv2_top_vec_data + deconv2_top_vec[0]->offset(n), deconv1_top_cache_diff);
					caffe_add(deconv1_top_cache.count(), deconv1_top_cache_diff, deconv1_top_vec_diff, deconv1_top_vec_diff);
				}
				caffe_div(deconv1_top_cache.count(), deconv1_top_vec_diff,
					deconv1_top_vec_data, deconv1_top_vec_diff);
				caffe_div(deconv1_top_cache.count(), deconv1_top_vec_diff,
					deconv1_top_vec_data, deconv1_top_vec_diff);
				// backward through deconv1_layer
				deconv1_layer->Backward(deconv1_top_vec, no_propagate_down, deconv1_bottom_vec);

				// alpha_diff2
				Dtype* alpha_diff = alphas->mutable_cpu_diff();
				//fuse alpha_diff1 and alpha_diff2
				caffe_sub(alpha_cache.count(), alpha_cache_diff2, alpha_diff, alpha_diff);

				exp_layer->Backward(exp_top_vec, yes_propagate_down, exp_bottom_vec);
			}
		}
	}
示例#24
0
文件: solver.cpp 项目: VikingMew/dec
void AdaGradSolver<Dtype>::ComputeUpdateValue() {
  vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
  vector<float>& net_params_lr = this->net_->params_lr();
  vector<float>& net_params_weight_decay = this->net_->params_weight_decay();
  // get the learning rate
  Dtype rate = this->GetLearningRate();
  Dtype delta = this->param_.delta();
  if (this->param_.display() && this->iter_ % this->param_.display() == 0) {
    LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate;
  }
  Dtype weight_decay = this->param_.weight_decay();
  string regularization_type = this->param_.regularization_type();
  switch (Caffe::mode()) {
  case Caffe::CPU:
    for (int param_id = 0; param_id < net_params.size(); ++param_id) {
      Dtype local_rate = rate * net_params_lr[param_id];
      Dtype local_decay = weight_decay * net_params_weight_decay[param_id];

      if (local_decay) {
        if (regularization_type == "L2") {
          // add weight decay
          caffe_axpy(net_params[param_id]->count(),
              local_decay,
              net_params[param_id]->cpu_data(),
              net_params[param_id]->mutable_cpu_diff());
        } else if (regularization_type == "L1") {
          caffe_cpu_sign(net_params[param_id]->count(),
              net_params[param_id]->cpu_data(),
              this->temp_[param_id]->mutable_cpu_data());
          caffe_axpy(net_params[param_id]->count(),
              local_decay,
              this->temp_[param_id]->cpu_data(),
              net_params[param_id]->mutable_cpu_diff());
        } else {
          LOG(FATAL) << "Unknown regularization type: " << regularization_type;
        }
      }

      // compute square of gradient in update
      caffe_powx(net_params[param_id]->count(),
          net_params[param_id]->cpu_diff(), Dtype(2),
          this->update_[param_id]->mutable_cpu_data());

      // update history
      caffe_add(net_params[param_id]->count(),
          this->update_[param_id]->cpu_data(),
          this->history_[param_id]->cpu_data(),
          this->history_[param_id]->mutable_cpu_data());

      // prepare update
      caffe_powx(net_params[param_id]->count(),
                this->history_[param_id]->cpu_data(), Dtype(0.5),
                this->update_[param_id]->mutable_cpu_data());

      caffe_add_scalar(net_params[param_id]->count(),
                delta, this->update_[param_id]->mutable_cpu_data());

      caffe_div(net_params[param_id]->count(),
                net_params[param_id]->cpu_diff(),
                this->update_[param_id]->cpu_data(),
                this->update_[param_id]->mutable_cpu_data());

      // scale and copy
      caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
          this->update_[param_id]->cpu_data(), Dtype(0),
          net_params[param_id]->mutable_cpu_diff());
    }
    break;
  case Caffe::GPU:
#ifndef CPU_ONLY
    for (int param_id = 0; param_id < net_params.size(); ++param_id) {
      Dtype local_rate = rate * net_params_lr[param_id];
      Dtype local_decay = weight_decay * net_params_weight_decay[param_id];

      if (local_decay) {
        if (regularization_type == "L2") {
          // add weight decay
          caffe_gpu_axpy(net_params[param_id]->count(),
              local_decay,
              net_params[param_id]->gpu_data(),
              net_params[param_id]->mutable_gpu_diff());
        } else if (regularization_type == "L1") {
          caffe_gpu_sign(net_params[param_id]->count(),
              net_params[param_id]->gpu_data(),
              this->temp_[param_id]->mutable_gpu_data());
          caffe_gpu_axpy(net_params[param_id]->count(),
              local_decay,
              this->temp_[param_id]->gpu_data(),
              net_params[param_id]->mutable_gpu_diff());
        } else {
          LOG(FATAL) << "Unknown regularization type: " << regularization_type;
        }
      }

      // compute square of gradient in update
      caffe_gpu_powx(net_params[param_id]->count(),
          net_params[param_id]->gpu_diff(), Dtype(2),
          this->update_[param_id]->mutable_gpu_data());

      // update history
      caffe_gpu_add(net_params[param_id]->count(),
          this->update_[param_id]->gpu_data(),
          this->history_[param_id]->gpu_data(),
          this->history_[param_id]->mutable_gpu_data());

      // prepare update
      caffe_gpu_powx(net_params[param_id]->count(),
                this->history_[param_id]->gpu_data(), Dtype(0.5),
                this->update_[param_id]->mutable_gpu_data());

      caffe_gpu_add_scalar(net_params[param_id]->count(),
                delta, this->update_[param_id]->mutable_gpu_data());

      caffe_gpu_div(net_params[param_id]->count(),
                net_params[param_id]->gpu_diff(),
                this->update_[param_id]->gpu_data(),
                this->update_[param_id]->mutable_gpu_data());

      // scale and copy
      caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
          this->update_[param_id]->gpu_data(), Dtype(0),
          net_params[param_id]->mutable_gpu_diff());
    }
#else
    NO_GPU;
#endif
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
  }
}
示例#25
0
void AdaGradSolver<Dtype>::ComputeUpdateValue(uint_tp param_id, Dtype rate) {
  CHECK(Caffe::root_solver());
  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
  const vector<float>& net_params_lr = this->net_->params_lr();
  Dtype delta = this->param_.delta();
  Dtype local_rate = rate * net_params_lr[param_id];
  switch (Caffe::mode()) {
    case Caffe::CPU: {
      // compute square of gradient in update
      caffe_powx(net_params[param_id]->count(),
                 net_params[param_id]->cpu_diff(), Dtype(2),
                 this->update_[param_id]->mutable_cpu_data());

      // update history
      caffe_add(net_params[param_id]->count(),
                this->update_[param_id]->cpu_data(),
                this->history_[param_id]->cpu_data(),
                this->history_[param_id]->mutable_cpu_data());

      // prepare update
      caffe_powx(net_params[param_id]->count(),
                 this->history_[param_id]->cpu_data(), Dtype(0.5),
                 this->update_[param_id]->mutable_cpu_data());

      caffe_add_scalar(net_params[param_id]->count(), delta,
                       this->update_[param_id]->mutable_cpu_data());

      caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(),
                this->update_[param_id]->cpu_data(),
                this->update_[param_id]->mutable_cpu_data());

      // scale and copy
      caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
                      this->update_[param_id]->cpu_data(), Dtype(0),
                      net_params[param_id]->mutable_cpu_diff());
      break;
    }
    case Caffe::GPU: {
#ifndef CPU_ONLY
      if (this->device_->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
        // compute square of gradient in update
        caffe_gpu_powx(net_params[param_id]->count(),
                       net_params[param_id]->gpu_diff(), Dtype(2),
                       this->update_[param_id]->mutable_gpu_data());

        // update history
        caffe_gpu_add(net_params[param_id]->count(),
                      this->update_[param_id]->gpu_data(),
                      this->history_[param_id]->gpu_data(),
                      this->history_[param_id]->mutable_gpu_data());

        // prepare update
        caffe_gpu_powx(net_params[param_id]->count(),
                       this->history_[param_id]->gpu_data(), Dtype(0.5),
                       this->update_[param_id]->mutable_gpu_data());

        caffe_gpu_add_scalar(net_params[param_id]->count(), delta,
                             this->update_[param_id]->mutable_gpu_data());

        caffe_gpu_div(net_params[param_id]->count(),
                      net_params[param_id]->gpu_diff(),
                      this->update_[param_id]->gpu_data(),
                      this->update_[param_id]->mutable_gpu_data());

        // scale and copy
        caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
                        this->update_[param_id]->gpu_data(), Dtype(0),
                        net_params[param_id]->mutable_gpu_diff());
#endif  // USE_CUDA
      } else {
#ifdef USE_GREENTEA
        // compute square of gradient in update
        greentea_gpu_powx<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (net_params[param_id]->gpu_diff()), 0, Dtype(2),
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        // update history
        greentea_gpu_add<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            (cl_mem) (this->history_[param_id]->gpu_data()), 0,
            (cl_mem) (this->history_[param_id]->mutable_gpu_data()), 0);

        // prepare update
        greentea_gpu_powx<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (this->history_[param_id]->gpu_data()), 0, Dtype(0.5),
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        greentea_gpu_add_scalar<Dtype>(
            this->device_->id(), net_params[param_id]->count(), delta,
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        greentea_gpu_div<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (net_params[param_id]->gpu_diff()), 0,
            (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        // scale and copy
        greentea_gpu_axpby<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            local_rate, (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            Dtype(0), (cl_mem) (net_params[param_id]->mutable_gpu_diff()), 0);
#endif  // USE_GREENTEA
      }
#else
      NO_GPU;
#endif
      break;
    }
    default:
      LOG(FATAL)<< "Unknown caffe mode: " << Caffe::mode();
    }
  }
示例#26
0
	void NonLocalLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
	{
		vector<bool> propagate_down_sub;
		propagate_down_sub.push_back(propagate_down[0]);
		propagate_down_sub.push_back(propagate_down[0]);
		if (propagate_down[0])
		{
			for (int i = 0; i < eltwise_bottom_vec.size(); i++)
				caffe_set(eltwise_bottom_vec[i]->count(), (Dtype)0, eltwise_bottom_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < smooth_bottom_vec.size(); i++)
				caffe_set(smooth_bottom_vec[i]->count(), (Dtype)0, smooth_bottom_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < euclidean_bottom_vec.size(); i++)
				caffe_set(euclidean_bottom_vec[i]->count(), (Dtype)0, euclidean_bottom_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < split_1_bottom_vec.size(); i++)
				caffe_set(split_1_bottom_vec[i]->count(), (Dtype)0, split_1_bottom_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < smooth_top_vec.size(); i++)
				caffe_set(smooth_top_vec[i]->count(), (Dtype)0, smooth_top_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < split_0_top_vec.size(); i++)
				caffe_set(split_0_top_vec[i]->count(), (Dtype)0, split_0_top_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < split_3_top_vec.size(); i++)
				caffe_set(split_3_top_vec[i]->count(), (Dtype)0, split_3_top_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < normalize_top_vec.size(); i++)
				caffe_set(normalize_top_vec[i]->count(), (Dtype)0, normalize_top_vec[i]->mutable_cpu_diff());

			if (top.size() == 3)
				eltwise_layer->Backward(eltwise_top_vec, propagate_down_sub, eltwise_bottom_vec);

			split_layer_2->Backward(split_2_top_vec, propagate_down_sub, split_2_bottom_vec);
			//int tmp_offset = smooth_top_vec[0]->offset(1);
			const int tmp_offset = split_3_top_vec[0]->offset(1);
			//const Dtype* eltwise_bottom_1_diff = eltwise_bottom_vec[1]->cpu_diff();
			const Dtype* split_2_bottom_diff = split_2_bottom_vec[0]->cpu_diff();
			//Dtype* smooth_top_diff = smooth_top_vec[0]->mutable_cpu_diff();
			Dtype* split_3_top_diff = split_3_top_vec[0]->mutable_cpu_diff();
			for (int n = 0; n < split_2_bottom_vec[0]->num(); ++n)
			{
				for (int ch = 0; ch < channels_; ++ch)
				{
					//caffe_add(tmp_offset, smooth_top_diff, split_2_bottom_diff, smooth_top_diff);
					caffe_add(tmp_offset, split_3_top_diff, split_2_bottom_diff, split_3_top_diff);
					split_2_bottom_diff += tmp_offset;
				}
				//smooth_top_diff += tmp_offset;
				split_3_top_diff += tmp_offset;
			}

			const int norm_offset = normalize_top_vec[0]->offset(1);
			Dtype* normalize_diff = normalize_top_vec[0]->mutable_cpu_diff();
			const Dtype* top_1_diff = top[1]->cpu_diff();
			for (int n = 0; n < normalize_top_vec[0]->num(); ++n)
			{
				for (int ch = 0; ch < channels_; ++ch)
				{
					caffe_add(tmp_offset, normalize_diff, top_1_diff, normalize_diff);
					top_1_diff += norm_offset;
				}
				normalize_diff += norm_offset;
			}
			//nomralize_top_vec[0]->ShareDiff(*top[1]);
			normalize_layer->Backward(normalize_top_vec, propagate_down_sub, normalize_bottom_vec);
			split_3_top_vec[1]->ShareDiff(*normalize_bottom_vec[0]);
			split_layer_3->Backward(split_3_top_vec, propagate_down_sub, split_3_bottom_vec);
			smooth_threshold_layer->Backward(smooth_top_vec, propagate_down_sub, smooth_bottom_vec);

			caffe_scal(euclidean_top_vec[0]->count(),
				(Dtype)(1.0 / bottom[0]->channels()), euclidean_top_vec[0]->mutable_cpu_diff());

			euclidean_layer->Backward(euclidean_top_vec, propagate_down_sub, euclidean_bottom_vec);
			split_1_top_vec[1]->ShareDiff(*euclidean_bottom_vec[0]);
			split_layer_1->Backward(split_1_top_vec, propagate_down_sub, split_1_bottom_vec);

			for (int n = 0; n < num_; ++n)
			{
				col2im_center_cpu(img2col_1_top.cpu_diff() + img2col_1_top.offset(n), channels_, height_, width_,
					kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
					split_0_top_vec[1]->mutable_cpu_diff() + split_0_top_vec[1]->offset(n));

				col2im_cpu(img2col_0_top.cpu_diff() + img2col_0_top.offset(n), channels_, height_, width_,
					kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
					1,1,
					split_0_top_vec[0]->mutable_cpu_diff() + split_0_top_vec[0]->offset(n));
			}
			split_layer_0->Backward(split_0_top_vec, propagate_down_sub,bottom);
		}
	}
void CoupledClusterLossLayer<Dtype>::Forward_cpu(
  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
    pos_ids = std::vector<std::vector<int> >(group_num, std::vector<int>());
    neg_ids = std::vector<std::vector<int> >(group_num, std::vector<int>());
    pos_backward = std::vector<bool>(group_num*N, false);
    neg_backward = std::vector<bool>(group_num*N, false);
    const Dtype *feat_ptr = bottom[0]->cpu_data();
    const Dtype *label_ptr = bottom[1]->cpu_data();
    Dtype *diff_ptr_ = diff_.mutable_cpu_data();
    Dtype loss(0);

    caffe_set(feat_len*group_num, Dtype(0), pos_center_.mutable_cpu_data());

    int cnt = 0;
    /* i -> group index */
    for(int i=0; i<group_num; ++i) {
        /* search for the positive id */
        std::set<Dtype> labels;
        Dtype anchor_id = -1;
        for(int j=0; j<N; ++j) {
            Dtype tmp = label_ptr[N*i+j];
            if(labels.count(tmp)>0) {
                anchor_id = tmp;
                break;
            }
            else
                labels.insert(tmp);
        }
        // CHECK_NE(anchor_id, -1);
        /* collect for positive and negative ids, compute the center of positive samples */
        for(int j=0; j<N; ++j) {
            if(label_ptr[i*N+j]==anchor_id){
                pos_ids[i].push_back(j);
                caffe_add(feat_len, feat_ptr+feat_len*(i*N+j), pos_center_.mutable_cpu_data()+feat_len*i, pos_center_.mutable_cpu_data()+feat_len*i);
            }
            else neg_ids[i].push_back(j);
        }
        caffe_cpu_scale(feat_len, Dtype(1)/pos_ids[i].size(), pos_center_.mutable_cpu_data()+feat_len*i, pos_center_.mutable_cpu_data()+feat_len*i);

        if(neg_ids[i].size()==0 || pos_ids[i].size()<=1) continue;

        Dtype pos_mdist = Dtype(0);
        Dtype neg_min_val = -1;
        Dtype pos_max_val = -1;
        for(int j=0; j<N; ++j) {
            // f[j]-center
            caffe_sub(feat_len, feat_ptr+feat_len*(i*N+j), pos_center_.cpu_data()+feat_len*i, diff_ptr_+feat_len*(i*N+j));
            if(scale!=1)
                caffe_cpu_scale(feat_len, scale, diff_ptr_+feat_len*(i*N+j), diff_ptr_+feat_len*(i*N+j));
            Dtype d = caffe_cpu_dot(feat_len, diff_ptr_+feat_len*(i*N+j), diff_ptr_+feat_len*(i*N+j));
            if(log_flag)
                LOG(INFO) << "i " << i << ", j " << j << ", d " << d;
            dist_sq_.mutable_cpu_data()[i*N+j] = d;
            if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)>0 && (neg_min_val==-1 || d<neg_min_val)) neg_min_val = d;
            else if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)==0 && (pos_max_val==-1 || d>pos_max_val)) pos_max_val = d;
        }
        for(int j=0; j<N; ++j) {
            if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)>0) {
                Dtype d = dist_sq_.cpu_data()[i*N+j];
                Dtype mdist = std::max(-d+margin+pos_max_val, Dtype(0));
                if(log_flag)
                    LOG(INFO) << "j=" << j << ", d=" << d << ", pos_max_val=" << pos_max_val << ", mdist=" << mdist;
                if(mdist>0) neg_backward[i*N+j] = true;
            }
            else {
                Dtype d = dist_sq_.cpu_data()[i*N+j];
                Dtype mdist = std::max(d+margin-neg_min_val, Dtype(0));
                if(log_flag)
                    LOG(INFO) << "j=" << j << ", d=" << d << ", neg_min_val=" << neg_min_val << ", mdist=" << mdist;
                if(mdist>0) pos_backward[i*N+j] = true;
                pos_mdist += mdist;
            }
        }
        /* average punishment */
        pos_mdist /= pos_ids[i].size();
        // pos_mdist *= 2;

        if(log_flag)
            LOG(INFO) << "pos_mdist " << pos_mdist << ", neg_min_val " << neg_min_val;

        CHECK_GE(pos_ids[i].size(), 2);
        CHECK_GE(neg_ids[i].size(), 1);

        loss += pos_mdist;
        ++cnt;
    }
    loss = loss / cnt;
    top[0]->mutable_cpu_data()[0] = loss;
}