Ejemplo n.º 1
0
void PowerLayer<Dtype>::Backward_gpu(
    const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    Dtype* bottom_diff = (bottom)[0]->mutable_gpu_diff();
    const int count = (bottom)[0]->count();
    const Dtype* top_diff = top[0]->gpu_diff();
    if (diff_scale_ == Dtype(0) || power_ == Dtype(1)) {
      caffe_gpu_set(count, diff_scale_, bottom_diff);
    } else {
      const Dtype* bottom_data = (bottom)[0]->gpu_data();
      // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1)
      //               = diff_scale * y / (shift + scale * x)
      if (power_ == Dtype(2)) {
        // Special case for y = (shift + scale * x)^2
        //     -> dy/dx = 2 * scale * (shift + scale * x)
        //              = diff_scale * shift + diff_scale * scale * x
        caffe_gpu_axpby(
            count,
            diff_scale_ * scale_,
            bottom_data,
            Dtype(0),
            bottom_diff);

        if (shift_ != Dtype(0)) {
          caffe_gpu_add_scalar(count, diff_scale_ * shift_, bottom_diff);
        }
      } else if (shift_ == Dtype(0)) {
        // Special case for y = (scale * x)^power
        //     -> dy/dx = scale * power * (scale * x)^(power - 1)
        //              = scale * power * (scale * x)^power * (scale * x)^(-1)
        //              = power * y / x
        const Dtype* top_data = top[0]->gpu_data();
        caffe_gpu_div(count, top_data, bottom_data, bottom_diff);
        caffe_gpu_scal(count, power_, bottom_diff);
      } else {
        caffe_copy(count, bottom_data, bottom_diff);
        if (scale_ != Dtype(1)) {
          caffe_gpu_scal(count, scale_, bottom_diff);
        }
        if (shift_ != Dtype(0)) {
          caffe_gpu_add_scalar(count, shift_, bottom_diff);
        }
        const Dtype* top_data = top[0]->gpu_data();
        caffe_gpu_div<Dtype>(count, top_data, bottom_diff, bottom_diff);
        if (diff_scale_ != Dtype(1)) {
          caffe_gpu_scal(count, diff_scale_, bottom_diff);
        }
      }
    }
    caffe_gpu_mul(count, top_diff, bottom_diff, bottom_diff);
  }
}
Ejemplo n.º 2
0
void AbsValLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

	const int count = top[0]->count();
	const Dtype* top_data = top[0]->gpu_data();
	const Dtype* top_diff = top[0]->gpu_diff();
	if (propagate_down[0]) {
		const Dtype* bottom_data = (bottom)[0]->gpu_data();
		Dtype* bottom_diff = (bottom)[0]->mutable_gpu_diff();
		caffe_gpu_div(count, top_data, bottom_data, bottom_diff);
		caffe_gpu_mul(count, bottom_diff, top_diff, bottom_diff);
	}
}
Ejemplo n.º 3
0
void LogLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  const int count = bottom[0]->count();
  const Dtype* bottom_data = bottom[0]->gpu_data();
  const Dtype* top_diff = top[0]->gpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
  caffe_copy(count, bottom_data, bottom_diff);
  if (input_scale_ != Dtype(1)) {
    caffe_gpu_scal(count, input_scale_, bottom_diff);
  }
  if (input_shift_ != Dtype(0)) {
    caffe_gpu_add_scalar(count, input_shift_, bottom_diff);
  }
  caffe_gpu_powx(count, bottom_diff, Dtype(-1), bottom_diff);
  if (backward_num_scale_ != Dtype(1)) {
    caffe_gpu_scal(count, backward_num_scale_, bottom_diff);
  }
  caffe_gpu_mul(count, top_diff, bottom_diff, bottom_diff);
}
Ejemplo n.º 4
0
void Tensor<Dtype>::MulFrom(const Tensor& source) {
  if (source.count() != count_ || source.shape() != shape_) {
    ASSERT(false, "Trying to add blobs of different sizes: "
      << source.count() << " != " << count_);
  }
  switch (mode()) {
  case Caffe::CPU:
    caffe_mul(count_, source.cpu_mem(),
        this->cpu_mem(),
        this->mutable_cpu_mem());
    break;
  case Caffe::GPU:
#ifndef CPU_ONLY
    caffe_gpu_mul(count_, source.gpu_mem(),
        this->gpu_mem(),
        this->mutable_gpu_mem());
#else
    NO_GPU;
#endif
    break;
  default:
    ASSERT(false, "Unknown caffe mode.");
  }
}
Ejemplo n.º 5
0
  void BatchNormLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
                                           const vector<bool>& propagate_down,
                                           const vector<Blob<Dtype>*>& bottom) {
    CHECK(!use_global_stats_);
    const Dtype* top_diff;
    if (bottom[0] != top[0]) {
      top_diff = top[0]->gpu_diff();
    }
    else {
      caffe_copy(x_norm_.count(), top[0]->gpu_diff(), x_norm_.mutable_gpu_diff());
      top_diff = x_norm_.gpu_diff();
    }
    const Dtype* top_data = x_norm_.gpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
    int num = bottom[0]->shape()[0];
    int spatial_dim = bottom[0]->count() / (channels_*bottom[0]->shape(0));
    // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then
    //
    // dE(Y)/dX =
    //   (dE/dY - mean(dE/dY) - mean(dE/dY \cdot Y) \cdot Y)
    //     ./ sqrt(var(X) + eps)
    //
    // where \cdot and ./ are hadamard product and elementwise division,
    // respectively, dE/dY is the top diff, and mean/var/sum are all computed
    // along all dimensions except the channels dimension.  In the above
    // equation, the operations allow for expansion (i.e. broadcast) along all
    // dimensions except the channels dimension where required.

    // sum(dE/dY \cdot Y)
    caffe_gpu_mul(temp_.count(), top_data, top_diff, bottom_diff);
    caffe_gpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim, 1.,
                          bottom_diff, spatial_sum_multiplier_.gpu_data(), 0.,
                          num_by_chans_.mutable_gpu_data());
    caffe_gpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
                          num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0.,
                          mean_.mutable_gpu_data());

    // reshape (broadcast) the above
    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
                          batch_sum_multiplier_.gpu_data(), mean_.gpu_data(), 0.,
                          num_by_chans_.mutable_gpu_data());
    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num,
                          spatial_dim, 1, 1., num_by_chans_.gpu_data(),
                          spatial_sum_multiplier_.gpu_data(), 0., bottom_diff);

    // sum(dE/dY \cdot Y) \cdot Y
    caffe_gpu_mul(temp_.count(), top_data, bottom_diff, bottom_diff);

    // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y
    caffe_gpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim, 1.,
                          top_diff, spatial_sum_multiplier_.gpu_data(), 0.,
                          num_by_chans_.mutable_gpu_data());
    caffe_gpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
                          num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0.,
                          mean_.mutable_gpu_data());
    // reshape (broadcast) the above to make
    // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y
    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
                          batch_sum_multiplier_.gpu_data(), mean_.gpu_data(), 0.,
                          num_by_chans_.mutable_gpu_data());
    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num * channels_,
                          spatial_dim, 1, 1., num_by_chans_.gpu_data(),
                          spatial_sum_multiplier_.gpu_data(), 1., bottom_diff);

    // dE/dY - mean(dE/dY)-mean(dE/dY \cdot Y) \cdot Y
    caffe_gpu_axpby(temp_.count(), Dtype(1), top_diff,
                    Dtype(-1. / (num * spatial_dim)), bottom_diff);

    // note: temp_ still contains sqrt(var(X)+eps), computed during the forward
    // pass.
    caffe_gpu_div(temp_.count(), bottom_diff, temp_.gpu_data(), bottom_diff);
  }