void PowerLayer<Dtype>::Backward_gpu(
    const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    Dtype* bottom_diff = (bottom)[0]->mutable_gpu_diff();
    const int count = (bottom)[0]->count();
    const Dtype* top_diff = top[0]->gpu_diff();
    if (diff_scale_ == Dtype(0) || power_ == Dtype(1)) {
      caffe_gpu_set(count, diff_scale_, bottom_diff);
    } else {
      const Dtype* bottom_data = (bottom)[0]->gpu_data();
      // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1)
      //               = diff_scale * y / (shift + scale * x)
      if (power_ == Dtype(2)) {
        // Special case for y = (shift + scale * x)^2
        //     -> dy/dx = 2 * scale * (shift + scale * x)
        //              = diff_scale * shift + diff_scale * scale * x
        caffe_gpu_axpby(
            count,
            diff_scale_ * scale_,
            bottom_data,
            Dtype(0),
            bottom_diff);

        if (shift_ != Dtype(0)) {
          caffe_gpu_add_scalar(count, diff_scale_ * shift_, bottom_diff);
        }
      } else if (shift_ == Dtype(0)) {
        // Special case for y = (scale * x)^power
        //     -> dy/dx = scale * power * (scale * x)^(power - 1)
        //              = scale * power * (scale * x)^power * (scale * x)^(-1)
        //              = power * y / x
        const Dtype* top_data = top[0]->gpu_data();
        caffe_gpu_div(count, top_data, bottom_data, bottom_diff);
        caffe_gpu_scal(count, power_, bottom_diff);
      } else {
        caffe_copy(count, bottom_data, bottom_diff);
        if (scale_ != Dtype(1)) {
          caffe_gpu_scal(count, scale_, bottom_diff);
        }
        if (shift_ != Dtype(0)) {
          caffe_gpu_add_scalar(count, shift_, bottom_diff);
        }
        const Dtype* top_data = top[0]->gpu_data();
        caffe_gpu_div<Dtype>(count, top_data, bottom_diff, bottom_diff);
        if (diff_scale_ != Dtype(1)) {
          caffe_gpu_scal(count, diff_scale_, bottom_diff);
        }
      }
    }
    caffe_gpu_mul(count, top_diff, bottom_diff, bottom_diff);
  }
}
void BiasChannelLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  // TODO(gpapan): write a CUDA kernel for this case
  const BiasChannelParameter_LabelType label_type =
    this->layer_param_.bias_channel_param().label_type();
  if (label_type == BiasChannelParameter_LabelType_PIXEL) {
    Forward_cpu(bottom, top);
    return;
  }
  caffe_copy(bottom[0]->count(), bottom[0]->gpu_data(), top[0]->mutable_gpu_data());
  for (int n = 0; n < num_; ++n) {
    for (int j = 0; j < max_labels_; ++j) {
      const int label = static_cast<int>(*bottom[1]->cpu_data_at(n, j));
      if (ignore_label_.count(label) != 0) {
	continue;
      } else if (label >= 0 && label < channels_) {
	// Bias the foreground or background scores
	const Dtype bias = (label == 0) ? bg_bias_ : fg_bias_;
	caffe_gpu_add_scalar(height_ * width_, bias, top[0]->mutable_gpu_data_at(n, label));	
      } else {
	LOG(FATAL) << "Unexpected label " << label;
      }
    }
  }
}
Exemple #3
0
void LogLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  const int count = bottom[0]->count();
  const Dtype* bottom_data = bottom[0]->gpu_data();
  const Dtype* top_diff = top[0]->gpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
  caffe_copy(count, bottom_data, bottom_diff);
  if (input_scale_ != Dtype(1)) {
    caffe_gpu_scal(count, input_scale_, bottom_diff);
  }
  if (input_shift_ != Dtype(0)) {
    caffe_gpu_add_scalar(count, input_shift_, bottom_diff);
  }
  caffe_gpu_powx(count, bottom_diff, Dtype(-1), bottom_diff);
  if (backward_num_scale_ != Dtype(1)) {
    caffe_gpu_scal(count, backward_num_scale_, bottom_diff);
  }
  caffe_gpu_mul(count, top_diff, bottom_diff, bottom_diff);
}
Exemple #4
0
void LogLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const int count = bottom[0]->count();
  const Dtype* bottom_data = bottom[0]->gpu_data();
  Dtype* top_data = top[0]->mutable_gpu_data();
  if (input_scale_ == Dtype(1) && input_shift_ == Dtype(0)) {
    caffe_gpu_log(count, bottom_data, top_data);
  } else {
    caffe_copy(count, bottom_data, top_data);
    if (input_scale_ != Dtype(1)) {
      caffe_gpu_scal(count, input_scale_, top_data);
    }
    if (input_shift_ != Dtype(0)) {
      caffe_gpu_add_scalar(count, input_shift_, top_data);
    }
    caffe_gpu_log(count, top_data, top_data);
  }
  if (base_scale_ != Dtype(1)) {
    caffe_gpu_scal(count, base_scale_, top_data);
  }
}
void PowerLayer<Dtype>::Forward_gpu(
    const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  Dtype* top_data = (top)[0]->mutable_gpu_data();
  const int count = bottom[0]->count();
  // Special case where we can ignore the input: scale or power is 0.
  if (diff_scale_ == Dtype(0)) {
    Dtype value = (power_ == 0) ? Dtype(1) : pow(shift_, power_);
    caffe_gpu_set(count, value, top_data);
    return;
  }
  const Dtype* bottom_data = bottom[0]->gpu_data();
  caffe_copy(count, bottom_data, top_data);
  if (scale_ != Dtype(1)) {
    caffe_gpu_scal(count, scale_, top_data);
  }
  if (shift_ != Dtype(0)) {
    caffe_gpu_add_scalar(count, shift_, top_data);
  }
  if (power_ != Dtype(1)) {
    caffe_gpu_powx(count, top_data, power_, top_data);
  }
}
Exemple #6
0
void AdaGradSolver<Dtype>::ComputeUpdateValue() {
  vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
  vector<float>& net_params_lr = this->net_->params_lr();
  vector<float>& net_params_weight_decay = this->net_->params_weight_decay();
  // get the learning rate
  Dtype rate = this->GetLearningRate();
  Dtype delta = this->param_.delta();
  if (this->param_.display() && this->iter_ % this->param_.display() == 0) {
    LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate;
  }
  Dtype weight_decay = this->param_.weight_decay();
  string regularization_type = this->param_.regularization_type();
  switch (Caffe::mode()) {
  case Caffe::CPU:
    for (int param_id = 0; param_id < net_params.size(); ++param_id) {
      Dtype local_rate = rate * net_params_lr[param_id];
      Dtype local_decay = weight_decay * net_params_weight_decay[param_id];

      if (local_decay) {
        if (regularization_type == "L2") {
          // add weight decay
          caffe_axpy(net_params[param_id]->count(),
              local_decay,
              net_params[param_id]->cpu_data(),
              net_params[param_id]->mutable_cpu_diff());
        } else if (regularization_type == "L1") {
          caffe_cpu_sign(net_params[param_id]->count(),
              net_params[param_id]->cpu_data(),
              this->temp_[param_id]->mutable_cpu_data());
          caffe_axpy(net_params[param_id]->count(),
              local_decay,
              this->temp_[param_id]->cpu_data(),
              net_params[param_id]->mutable_cpu_diff());
        } else {
          LOG(FATAL) << "Unknown regularization type: " << regularization_type;
        }
      }

      // compute square of gradient in update
      caffe_powx(net_params[param_id]->count(),
          net_params[param_id]->cpu_diff(), Dtype(2),
          this->update_[param_id]->mutable_cpu_data());

      // update history
      caffe_add(net_params[param_id]->count(),
          this->update_[param_id]->cpu_data(),
          this->history_[param_id]->cpu_data(),
          this->history_[param_id]->mutable_cpu_data());

      // prepare update
      caffe_powx(net_params[param_id]->count(),
                this->history_[param_id]->cpu_data(), Dtype(0.5),
                this->update_[param_id]->mutable_cpu_data());

      caffe_add_scalar(net_params[param_id]->count(),
                delta, this->update_[param_id]->mutable_cpu_data());

      caffe_div(net_params[param_id]->count(),
                net_params[param_id]->cpu_diff(),
                this->update_[param_id]->cpu_data(),
                this->update_[param_id]->mutable_cpu_data());

      // scale and copy
      caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
          this->update_[param_id]->cpu_data(), Dtype(0),
          net_params[param_id]->mutable_cpu_diff());
    }
    break;
  case Caffe::GPU:
#ifndef CPU_ONLY
    for (int param_id = 0; param_id < net_params.size(); ++param_id) {
      Dtype local_rate = rate * net_params_lr[param_id];
      Dtype local_decay = weight_decay * net_params_weight_decay[param_id];

      if (local_decay) {
        if (regularization_type == "L2") {
          // add weight decay
          caffe_gpu_axpy(net_params[param_id]->count(),
              local_decay,
              net_params[param_id]->gpu_data(),
              net_params[param_id]->mutable_gpu_diff());
        } else if (regularization_type == "L1") {
          caffe_gpu_sign(net_params[param_id]->count(),
              net_params[param_id]->gpu_data(),
              this->temp_[param_id]->mutable_gpu_data());
          caffe_gpu_axpy(net_params[param_id]->count(),
              local_decay,
              this->temp_[param_id]->gpu_data(),
              net_params[param_id]->mutable_gpu_diff());
        } else {
          LOG(FATAL) << "Unknown regularization type: " << regularization_type;
        }
      }

      // compute square of gradient in update
      caffe_gpu_powx(net_params[param_id]->count(),
          net_params[param_id]->gpu_diff(), Dtype(2),
          this->update_[param_id]->mutable_gpu_data());

      // update history
      caffe_gpu_add(net_params[param_id]->count(),
          this->update_[param_id]->gpu_data(),
          this->history_[param_id]->gpu_data(),
          this->history_[param_id]->mutable_gpu_data());

      // prepare update
      caffe_gpu_powx(net_params[param_id]->count(),
                this->history_[param_id]->gpu_data(), Dtype(0.5),
                this->update_[param_id]->mutable_gpu_data());

      caffe_gpu_add_scalar(net_params[param_id]->count(),
                delta, this->update_[param_id]->mutable_gpu_data());

      caffe_gpu_div(net_params[param_id]->count(),
                net_params[param_id]->gpu_diff(),
                this->update_[param_id]->gpu_data(),
                this->update_[param_id]->mutable_gpu_data());

      // scale and copy
      caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
          this->update_[param_id]->gpu_data(), Dtype(0),
          net_params[param_id]->mutable_gpu_diff());
    }
#else
    NO_GPU;
#endif
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
  }
}
  void BatchNormLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
                                          const vector<Blob<Dtype>*>& top) {
    const Dtype* bottom_data = bottom[0]->gpu_data();
    Dtype* top_data = top[0]->mutable_gpu_data();
    int num = bottom[0]->shape(0);
    int spatial_dim = bottom[0]->count() / (channels_*bottom[0]->shape(0));

    if (bottom[0] != top[0]) {
      caffe_copy(bottom[0]->count(), bottom_data, top_data);
    }


    if (use_global_stats_) {
      // use the stored mean/variance estimates.  TODO(cdoersch): allow an option
      // to use an unbiased variance estimate, like the paper does.
      caffe_copy(mean_.count(), this->blobs_[0]->gpu_data(),
                 mean_.mutable_gpu_data());
      int m = bottom[0]->count() / channels_;
      Dtype scale_factor = m > 1 ? Dtype(m) / (m - 1) : 1;
      caffe_gpu_scale(variance_.count(), scale_factor,
                      this->blobs_[1]->gpu_data(), variance_.mutable_gpu_data());
    }
    else {
      // compute mean
      caffe_gpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim,
                            1. / (num * spatial_dim), bottom_data,
                            spatial_sum_multiplier_.gpu_data(), 0.,
                            num_by_chans_.mutable_gpu_data());
      caffe_gpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
                            num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0.,
                            mean_.mutable_gpu_data());
    }

    // subtract mean
    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
                          batch_sum_multiplier_.gpu_data(), mean_.gpu_data(), 0.,
                          num_by_chans_.mutable_gpu_data());
    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num,
                          spatial_dim, 1, -1, num_by_chans_.gpu_data(),
                          spatial_sum_multiplier_.gpu_data(), 1., top_data);

    if (!use_global_stats_) {
      // compute variance using var(X) = E((X-EX)^2)
      caffe_gpu_powx(top[0]->count(), top_data, Dtype(2),
                     temp_.mutable_gpu_data());  // (X-EX)^2
      caffe_gpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim,
                            1. / (num * spatial_dim), temp_.gpu_data(),
                            spatial_sum_multiplier_.gpu_data(), 0.,
                            num_by_chans_.mutable_gpu_data());
      caffe_gpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
                            num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0.,
                            variance_.mutable_gpu_data());  // E((X_EX)^2)

      // compute and save moving average
      Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ?
        1 : 1 - moving_average_fraction_;
      caffe_gpu_axpby(mean_.count(), scale_factor,
                      mean_.gpu_data(), moving_average_fraction_,
                      this->blobs_[0]->mutable_gpu_data());
      caffe_gpu_axpby(variance_.count(), scale_factor,
                      variance_.gpu_data(), moving_average_fraction_,
                      this->blobs_[1]->mutable_gpu_data());
      this->blobs_[2]->mutable_cpu_data()[0] += 1;
    }

    // normalize variance
    caffe_gpu_add_scalar(variance_.count(), eps_, variance_.mutable_gpu_data());
    caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5),
                   variance_.mutable_gpu_data());

    // replicate variance to input size
    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
                          batch_sum_multiplier_.gpu_data(), variance_.gpu_data(), 0.,
                          num_by_chans_.mutable_gpu_data());
    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num,
                          spatial_dim, 1, 1., num_by_chans_.gpu_data(),
                          spatial_sum_multiplier_.gpu_data(), 0., temp_.mutable_gpu_data());
    caffe_gpu_div(temp_.count(), top_data, temp_.gpu_data(), top_data);
    // TODO(cdoersch): The caching is only needed because later in-place layers
    //                 might clobber the data.  Can we skip this if they won't?
    caffe_copy(x_norm_.count(), top_data,
               x_norm_.mutable_gpu_data());
  }
Exemple #8
0
void AdaGradSolver<Dtype>::ComputeUpdateValue(uint_tp param_id, Dtype rate) {
  CHECK(Caffe::root_solver());
  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
  const vector<float>& net_params_lr = this->net_->params_lr();
  Dtype delta = this->param_.delta();
  Dtype local_rate = rate * net_params_lr[param_id];
  switch (Caffe::mode()) {
    case Caffe::CPU: {
      // compute square of gradient in update
      caffe_powx(net_params[param_id]->count(),
                 net_params[param_id]->cpu_diff(), Dtype(2),
                 this->update_[param_id]->mutable_cpu_data());

      // update history
      caffe_add(net_params[param_id]->count(),
                this->update_[param_id]->cpu_data(),
                this->history_[param_id]->cpu_data(),
                this->history_[param_id]->mutable_cpu_data());

      // prepare update
      caffe_powx(net_params[param_id]->count(),
                 this->history_[param_id]->cpu_data(), Dtype(0.5),
                 this->update_[param_id]->mutable_cpu_data());

      caffe_add_scalar(net_params[param_id]->count(), delta,
                       this->update_[param_id]->mutable_cpu_data());

      caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(),
                this->update_[param_id]->cpu_data(),
                this->update_[param_id]->mutable_cpu_data());

      // scale and copy
      caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
                      this->update_[param_id]->cpu_data(), Dtype(0),
                      net_params[param_id]->mutable_cpu_diff());
      break;
    }
    case Caffe::GPU: {
#ifndef CPU_ONLY
      if (this->device_->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
        // compute square of gradient in update
        caffe_gpu_powx(net_params[param_id]->count(),
                       net_params[param_id]->gpu_diff(), Dtype(2),
                       this->update_[param_id]->mutable_gpu_data());

        // update history
        caffe_gpu_add(net_params[param_id]->count(),
                      this->update_[param_id]->gpu_data(),
                      this->history_[param_id]->gpu_data(),
                      this->history_[param_id]->mutable_gpu_data());

        // prepare update
        caffe_gpu_powx(net_params[param_id]->count(),
                       this->history_[param_id]->gpu_data(), Dtype(0.5),
                       this->update_[param_id]->mutable_gpu_data());

        caffe_gpu_add_scalar(net_params[param_id]->count(), delta,
                             this->update_[param_id]->mutable_gpu_data());

        caffe_gpu_div(net_params[param_id]->count(),
                      net_params[param_id]->gpu_diff(),
                      this->update_[param_id]->gpu_data(),
                      this->update_[param_id]->mutable_gpu_data());

        // scale and copy
        caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
                        this->update_[param_id]->gpu_data(), Dtype(0),
                        net_params[param_id]->mutable_gpu_diff());
#endif  // USE_CUDA
      } else {
#ifdef USE_GREENTEA
        // compute square of gradient in update
        greentea_gpu_powx<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (net_params[param_id]->gpu_diff()), 0, Dtype(2),
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        // update history
        greentea_gpu_add<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            (cl_mem) (this->history_[param_id]->gpu_data()), 0,
            (cl_mem) (this->history_[param_id]->mutable_gpu_data()), 0);

        // prepare update
        greentea_gpu_powx<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (this->history_[param_id]->gpu_data()), 0, Dtype(0.5),
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        greentea_gpu_add_scalar<Dtype>(
            this->device_->id(), net_params[param_id]->count(), delta,
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        greentea_gpu_div<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (net_params[param_id]->gpu_diff()), 0,
            (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        // scale and copy
        greentea_gpu_axpby<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            local_rate, (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            Dtype(0), (cl_mem) (net_params[param_id]->mutable_gpu_diff()), 0);
#endif  // USE_GREENTEA
      }
#else
      NO_GPU;
#endif
      break;
    }
    default:
      LOG(FATAL)<< "Unknown caffe mode: " << Caffe::mode();
    }
  }