Example #1
0
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
  if (source.count() != count_ || source.shape() != shape_) {
    if (reshape) {
      ReshapeLike(source);
    } else {
      LOG(FATAL)<< "Trying to copy blobs of different sizes.";
    }
  }
  switch (Caffe::mode()) {
    case Caffe::GPU: {
      if (device_->backend() == BACKEND_CUDA) {
        if (copy_diff) {
          caffe_copy(count_, source.gpu_diff(),
              static_cast<Dtype*>(diff_->mutable_gpu_data()));
        } else {
          caffe_copy(count_, source.gpu_data(),
              static_cast<Dtype*>(data_->mutable_gpu_data()));
        }
      } else {
#ifdef USE_GREENTEA
        if (copy_diff) {
          greentea_copy<Dtype>(
              count_, (cl_mem) (source.gpu_diff()), 0,
              (cl_mem) (diff_->mutable_gpu_data()), 0,
              &viennacl::ocl::get_context(device_->id()));
        } else {
          greentea_copy<Dtype>(
              count_, (cl_mem) (source.gpu_data()), 0,
              (cl_mem) (data_->mutable_gpu_data()), 0,
              &viennacl::ocl::get_context(device_->id()));
        }
#endif
      }
      break;
    }
    case Caffe::CPU: {
      if (copy_diff) {
        caffe_cpu_copy(count_, source.cpu_diff(),
            static_cast<Dtype*>(diff_->mutable_cpu_data()));
      } else {
        caffe_cpu_copy(count_, source.cpu_data(),
            static_cast<Dtype*>(data_->mutable_cpu_data()));
      }
      break;
    }
    default:
    LOG(FATAL)<< "Unknown caffe mode.";
  }
}
Dtype GradientChecker<Dtype>::GetObjAndGradient(const Layer<Dtype>& layer,
                                                const vector<Blob<Dtype>*>& top,
                                                int_tp top_id,
                                                int_tp top_data_id) {
  Dtype loss = 0;
  if (top_id < 0) {
    // the loss will be half of the sum of squares of all outputs
    for (int_tp i = 0; i < top.size(); ++i) {
      Blob<Dtype>* top_blob = top[i];
      const Dtype* top_blob_data = top_blob->cpu_data();
      Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
      int_tp count = top_blob->count();
      for (int_tp j = 0; j < count; ++j) {
        loss += top_blob_data[j] * top_blob_data[j];
      }
      // set the diff: simply the data.
      caffe_cpu_copy(top_blob->count(), top_blob_data, top_blob_diff);
    }
    loss /= 2.;
  } else {
    // the loss will be the top_data_id-th element in the top_id-th blob.
    for (int_tp i = 0; i < top.size(); ++i) {
      Blob<Dtype>* top_blob = top[i];
      Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
      caffe_set(top_blob->count(), Dtype(0), top_blob_diff);
    }
    const Dtype loss_weight = 2;
    loss = top[top_id]->cpu_data()[top_data_id] * loss_weight;
    top[top_id]->mutable_cpu_diff()[top_data_id] = loss_weight;
  }
  return loss;
}
Example #3
0
void SGDSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
  const vector<float>& net_params_lr = this->net_->params_lr();
  Dtype momentum = this->param_.momentum();
  Dtype local_rate = rate * net_params_lr[param_id];
  // Compute the update to history, then copy it to the parameter diff.
  switch (Caffe::mode()) {
    case Caffe::CPU: {
      caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
                      net_params[param_id]->cpu_diff(), momentum,
                      history_[param_id]->mutable_cpu_data());
      caffe_cpu_copy(net_params[param_id]->count(),
                     history_[param_id]->cpu_data(),
                 net_params[param_id]->mutable_cpu_diff());
      break;
    }
    case Caffe::GPU: {
#ifndef CPU_ONLY
    sgd_update_gpu(this->device_, net_params[param_id]->count(),
        net_params[param_id]->mutable_gpu_diff(),
        history_[param_id]->mutable_gpu_data(),
        momentum, local_rate);
#else
      NO_GPU;
#endif
      break;
    }
    default: {
      LOG(FATAL)<< "Unknown caffe mode: " << Caffe::mode();
    }
  }
}
Example #4
0
void HingeLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  const Dtype* label = bottom[1]->cpu_data();
  int_tp num = bottom[0]->num();
  int_tp count = bottom[0]->count();
  int_tp dim = count / num;

  caffe_cpu_copy(count, bottom_data, bottom_diff);
  for (int_tp i = 0; i < num; ++i) {
    bottom_diff[i * dim + static_cast<int_tp>(label[i])] *= -1;
  }
  for (int_tp i = 0; i < num; ++i) {
    for (int_tp j = 0; j < dim; ++j) {
      bottom_diff[i * dim + j] = std::max(
        Dtype(0), 1 + bottom_diff[i * dim + j]);
    }
  }
  Dtype* loss = top[0]->mutable_cpu_data();
  switch (this->layer_param_.hinge_loss_param().norm()) {
  case HingeLossParameter_Norm_L1:
    loss[0] = caffe_cpu_asum(count, bottom_diff) / num;
    break;
  case HingeLossParameter_Norm_L2:
    loss[0] = caffe_cpu_dot(count, bottom_diff, bottom_diff) / num;
    break;
  default:
    LOG(FATAL) << "Unknown Norm";
  }
}
Example #5
0
void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const Dtype* prob_data = prob_.cpu_data();
    caffe_cpu_copy(prob_.count(), prob_data, bottom_diff);
    const Dtype* label = bottom[1]->cpu_data();
    int_tp dim = prob_.count() / outer_num_;
    int_tp count = 0;
    for (int_tp i = 0; i < outer_num_; ++i) {
      for (int_tp j = 0; j < inner_num_; ++j) {
        const int_tp label_value = static_cast<int_tp>
            (label[i * inner_num_ + j]);
        if (has_ignore_label_ && label_value == ignore_label_) {
          for (int_tp c = 0; c < bottom[0]->shape(softmax_axis_); ++c) {
            bottom_diff[i * dim + c * inner_num_ + j] = 0;
          }
        } else {
          bottom_diff[i * dim + label_value * inner_num_ + j] -= 1;
          ++count;
        }
      }
    }
    // Scale gradient
    Dtype loss_weight = top[0]->cpu_diff()[0] /
                        get_normalizer(normalization_, count);
    caffe_scal(prob_.count(), loss_weight, bottom_diff);
  }
}
Example #6
0
TYPED_TEST(CPUMathFunctionsTest, TestCopy) {
  const int n = this->blob_bottom_->count();
  const TypeParam* bottom_data = this->blob_bottom_->cpu_data();
  TypeParam* top_data = this->blob_top_->mutable_cpu_data();
  caffe_cpu_copy(n, bottom_data, top_data);
  for (int i = 0; i < n; ++i) {
    EXPECT_EQ(bottom_data[i], top_data[i]);
  }
}
Example #7
0
void HDF5OutputLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  CHECK_GE(bottom.size(), 2);
  CHECK_EQ(bottom[0]->num(), bottom[1]->num());
  data_blob_.Reshape(bottom[0]->num(), bottom[0]->channels(),
                     bottom[0]->height(), bottom[0]->width());
  label_blob_.Reshape(bottom[1]->num(), bottom[1]->channels(),
                     bottom[1]->height(), bottom[1]->width());
  const int_tp data_datum_dim = bottom[0]->count() / bottom[0]->num();
  const int_tp label_datum_dim = bottom[1]->count() / bottom[1]->num();

  for (int_tp i = 0; i < bottom[0]->num(); ++i) {
    caffe_cpu_copy(data_datum_dim, &bottom[0]->cpu_data()[i * data_datum_dim],
        &data_blob_.mutable_cpu_data()[i * data_datum_dim]);
    caffe_cpu_copy(label_datum_dim, &bottom[1]->cpu_data()[i * label_datum_dim],
        &label_blob_.mutable_cpu_data()[i * label_datum_dim]);
  }
  SaveBlobs();
}
Example #8
0
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty");
  // Reshape to loaded data.
  top[0]->ReshapeLike(batch->data_);
  // Copy the data
  caffe_cpu_copy(batch->data_.count(), batch->data_.cpu_data(),
             top[0]->mutable_cpu_data());
  DLOG(INFO) << "Prefetch copied";
  if (this->output_labels_) {
    // Reshape to loaded labels.
    top[1]->ReshapeLike(batch->label_);
    // Copy the labels.
    caffe_cpu_copy(batch->label_.count(), batch->label_.cpu_data(),
                   top[1]->mutable_cpu_data());
  }

  prefetch_free_.push(batch);
}
Example #9
0
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  // First, join the thread
  JoinPrefetchThread();
  DLOG(INFO) << "Thread joined";
  // Reshape to loaded data.
  top[0]->ReshapeLike(prefetch_data_);
  // Copy the data
  caffe_cpu_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
             top[0]->mutable_cpu_data());
  DLOG(INFO) << "Prefetch copied";
  if (this->output_labels_) {
    // Reshape to loaded labels.
    top[1]->ReshapeLike(prefetch_label_);
    // Copy the labels.
    caffe_cpu_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
               top[1]->mutable_cpu_data());
  }
  // Start a new prefetch thread
  DLOG(INFO) << "CreatePrefetchThread";
  CreatePrefetchThread();
}
Example #10
0
void PowerLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const int count = bottom[0]->count();
    const Dtype* top_diff = top[0]->cpu_diff();
    if (diff_scale_ == Dtype(0) || power_ == Dtype(1)) {
      caffe_set(count, diff_scale_, bottom_diff);
    } else {
      const Dtype* bottom_data = bottom[0]->cpu_data();
      // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1)
      //               = diff_scale * y / (shift + scale * x)
      if (power_ == Dtype(2)) {
        // Special case for y = (shift + scale * x)^2
        //     -> dy/dx = 2 * scale * (shift + scale * x)
        //              = diff_scale * shift + diff_scale * scale * x
        caffe_cpu_axpby(count, diff_scale_ * scale_, bottom_data,
            Dtype(0), bottom_diff);
        if (shift_ != Dtype(0)) {
          caffe_add_scalar(count, diff_scale_ * shift_, bottom_diff);
        }
      } else if (shift_ == Dtype(0)) {
        // Special case for y = (scale * x)^power
        //     -> dy/dx = scale * power * (scale * x)^(power - 1)
        //              = scale * power * (scale * x)^power * (scale * x)^(-1)
        //              = power * y / x
        const Dtype* top_data = top[0]->cpu_data();
        caffe_div(count, top_data, bottom_data, bottom_diff);
        caffe_scal(count, power_, bottom_diff);
      } else {
        caffe_cpu_copy(count, bottom_data, bottom_diff);
        if (scale_ != Dtype(1)) {
          caffe_scal(count, scale_, bottom_diff);
        }
        if (shift_ != Dtype(0)) {
          caffe_add_scalar(count, shift_, bottom_diff);
        }
        const Dtype* top_data = top[0]->cpu_data();
        caffe_div<Dtype>(count, top_data, bottom_diff, bottom_diff);
        if (diff_scale_ != Dtype(1)) {
          caffe_scal(count, diff_scale_, bottom_diff);
        }
      }
    }
    if (diff_scale_ != Dtype(0)) {
      caffe_mul(count, top_diff, bottom_diff, bottom_diff);
    }
  }
}
Example #11
0
void SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  if (top.size() == 1) {
    caffe_cpu_copy(count_, top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff());
    return;
  }
  caffe_add(count_, top[0]->cpu_diff(), top[1]->cpu_diff(),
            bottom[0]->mutable_cpu_diff());
  // Add remaining top blob diffs.
  for (int i = 2; i < top.size(); ++i) {
    const Dtype* top_diff = top[i]->cpu_diff();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff);
  }
}
Example #12
0
void FilterLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  int new_tops_num = indices_to_forward_.size();
  // forward all filtered items for all bottoms but the Selector (bottom[last])
  for (int t = 0; t < top.size(); ++t) {
    const Dtype* bottom_data = bottom[t]->cpu_data();
    Dtype* top_data = top[t]->mutable_cpu_data();
    int dim = bottom[t]->count() / bottom[t]->shape(0);
    for (int n = 0; n < new_tops_num; ++n) {
      int data_offset_top = n * dim;
      int data_offset_bottom = indices_to_forward_[n] * bottom[t]->count(1);
      caffe_cpu_copy(dim, bottom_data + data_offset_bottom,
          top_data + data_offset_top);
    }
  }
}
Example #13
0
void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  uint_tp* mask = rand_vec_.mutable_cpu_data();
  const int_tp count = bottom[0]->count();
  if (this->phase_ == TRAIN) {
    // Create random numbers
    caffe_rng_bernoulli(count, 1. - threshold_, mask);
    for (int_tp i = 0; i < count; ++i) {
      top_data[i] = bottom_data[i] * mask[i] * scale_;
    }
  } else {
    caffe_cpu_copy(bottom[0]->count(), bottom_data, top_data);
  }
}
Example #14
0
void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    if (this->phase_ == TRAIN) {
      const uint_tp* mask = rand_vec_.cpu_data();
      const int_tp count = bottom[0]->count();
      for (int_tp i = 0; i < count; ++i) {
        bottom_diff[i] = top_diff[i] * mask[i] * scale_;
      }
    } else {
      caffe_cpu_copy(top[0]->count(), top_diff, bottom_diff);
    }
  }
}
Example #15
0
void ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  if (bottom.size() == 1) { return; }
  Dtype* top_data = top[0]->mutable_cpu_data();
  int offset_concat_axis = 0;
  const int top_concat_axis = top[0]->shape(concat_axis_);
  for (int i = 0; i < bottom.size(); ++i) {
    const Dtype* bottom_data = bottom[i]->cpu_data();
    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
    for (int n = 0; n < num_concats_; ++n) {
      caffe_cpu_copy(bottom_concat_axis * concat_input_size_,
          bottom_data + n * bottom_concat_axis * concat_input_size_,
          top_data + (n * top_concat_axis + offset_concat_axis)
              * concat_input_size_);
    }
    offset_concat_axis += bottom_concat_axis;
  }
}
Example #16
0
void SliceLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0] || top.size() == 1) { return; }
  int_tp offset_slice_axis = 0;
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  const int_tp bottom_slice_axis = bottom[0]->shape(slice_axis_);
  for (int_tp i = 0; i < top.size(); ++i) {
    const Dtype* top_diff = top[i]->cpu_diff();
    const int_tp top_slice_axis = top[i]->shape(slice_axis_);
    for (int_tp n = 0; n < num_slices_; ++n) {
      const int_tp top_offset = n * top_slice_axis * slice_size_;
      const int_tp bottom_offset =
          (n * bottom_slice_axis + offset_slice_axis) * slice_size_;
      caffe_cpu_copy(top_slice_axis * slice_size_,
          top_diff + top_offset, bottom_diff + bottom_offset);
    }
    offset_slice_axis += top_slice_axis;
  }
}
Example #17
0
void SliceLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  if (top.size() == 1) { return; }
  int_tp offset_slice_axis = 0;
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const int_tp bottom_slice_axis = bottom[0]->shape(slice_axis_);
  for (int_tp i = 0; i < top.size(); ++i) {
    Dtype* top_data = top[i]->mutable_cpu_data();
    const int_tp top_slice_axis = top[i]->shape(slice_axis_);
    for (int_tp n = 0; n < num_slices_; ++n) {
      const int_tp top_offset = n * top_slice_axis * slice_size_;
      const int_tp bottom_offset =
          (n * bottom_slice_axis + offset_slice_axis) * slice_size_;
      caffe_cpu_copy(top_slice_axis * slice_size_,
          bottom_data + bottom_offset, top_data + top_offset);
    }
    offset_slice_axis += top_slice_axis;
  }
}
Example #18
0
void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (bottom.size() == 1) { return; }
  const Dtype* top_diff = top[0]->cpu_diff();
  int offset_concat_axis = 0;
  const int top_concat_axis = top[0]->shape(concat_axis_);
  for (int i = 0; i < bottom.size(); ++i) {
    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
    if (propagate_down[i]) {
      Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
      for (int n = 0; n < num_concats_; ++n) {
        caffe_cpu_copy(bottom_concat_axis * concat_input_size_, top_diff +
            (n * top_concat_axis + offset_concat_axis) * concat_input_size_,
            bottom_diff + n * bottom_concat_axis * concat_input_size_);
      }
    }
    offset_concat_axis += bottom_concat_axis;
  }
}
Example #19
0
void EmbedLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* weight = this->blobs_[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  int index;
  for (int n = 0; n < M_; ++n) {
    index = static_cast<int>(bottom_data[n]);
    DCHECK_GE(index, 0);
    DCHECK_LT(index, K_);
    DCHECK_EQ(static_cast<Dtype>(index), bottom_data[n]) << "non-integer input";
    caffe_cpu_copy(N_, weight + index * N_, top_data + n * N_);
  }
  if (bias_term_) {
    const Dtype* bias = this->blobs_[1]->cpu_data();
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, Dtype(1),
        bias_multiplier_.cpu_data(), bias, Dtype(1), top_data);
  }
}
Example #20
0
void LogLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  const int count = bottom[0]->count();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  caffe_cpu_copy(count, bottom_data, bottom_diff);
  if (input_scale_ != Dtype(1)) {
    caffe_scal(count, input_scale_, bottom_diff);
  }
  if (input_shift_ != Dtype(0)) {
    caffe_add_scalar(count, input_shift_, bottom_diff);
  }
  caffe_powx(count, bottom_diff, Dtype(-1), bottom_diff);
  if (backward_num_scale_ != Dtype(1)) {
    caffe_scal(count, backward_num_scale_, bottom_diff);
  }
  caffe_mul(count, top_diff, bottom_diff, bottom_diff);
}
Example #21
0
void LogLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const int count = bottom[0]->count();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  if (input_scale_ == Dtype(1) && input_shift_ == Dtype(0)) {
    caffe_log(count, bottom_data, top_data);
  } else {
    caffe_cpu_copy(count, bottom_data, top_data);
    if (input_scale_ != Dtype(1)) {
      caffe_scal(count, input_scale_, top_data);
    }
    if (input_shift_ != Dtype(0)) {
      caffe_add_scalar(count, input_shift_, top_data);
    }
    caffe_log(count, top_data, top_data);
  }
  if (base_scale_ != Dtype(1)) {
    caffe_scal(count, base_scale_, top_data);
  }
}
Example #22
0
void PowerLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  Dtype* top_data = top[0]->mutable_cpu_data();
  const int count = bottom[0]->count();
  // Special case where we can ignore the input: scale or power is 0.
  if (diff_scale_ == Dtype(0)) {
    Dtype value = (power_ == 0) ? Dtype(1) : pow(shift_, power_);
    caffe_set(count, value, top_data);
    return;
  }
  const Dtype* bottom_data = bottom[0]->cpu_data();
  caffe_cpu_copy(count, bottom_data, top_data);
  if (scale_ != Dtype(1)) {
    caffe_scal(count, scale_, top_data);
  }
  if (shift_ != Dtype(0)) {
    caffe_add_scalar(count, shift_, top_data);
  }
  if (power_ != Dtype(1)) {
    caffe_powx(count, top_data, power_, top_data);
  }
}
Example #23
0
void FilterLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[bottom.size() - 1]) {
    LOG(FATAL) << this->type()
               << "Layer cannot backpropagate to filter index inputs";
  }
  for (int i = 0; i < top.size(); i++) {
    // bottom[last] is the selector and never needs backpropagation
    // so we can iterate over top vector because top.size() == bottom.size() -1
    if (propagate_down[i]) {
      const int dim = top[i]->count() / top[i]->shape(0);
      int next_to_backward_offset = 0;
      int batch_offset = 0;
      int data_offset_bottom = 0;
      int data_offset_top = 0;
      for (int n = 0; n < bottom[i]->shape(0); n++) {
        data_offset_bottom = n * dim;
        if (next_to_backward_offset >= indices_to_forward_.size()) {
          // we already visited all items that were been forwarded, so
          // just set to zero remaining ones
          caffe_set(dim, Dtype(0),
              bottom[i]->mutable_cpu_diff() + data_offset_bottom);
        } else {
          batch_offset = indices_to_forward_[next_to_backward_offset];
          if (n != batch_offset) {  // this data was not been forwarded
            caffe_set(dim, Dtype(0),
                bottom[i]->mutable_cpu_diff() + data_offset_bottom);
          } else {  // this data was been forwarded
            data_offset_top = next_to_backward_offset * dim;
            next_to_backward_offset++;  // point to next forwarded item index
            caffe_cpu_copy(dim, top[i]->mutable_cpu_diff() + data_offset_top,
                bottom[i]->mutable_cpu_diff() + data_offset_bottom);
          }
        }
      }
    }
  }
}
Example #24
0
void PReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
                                    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  const int count = bottom[0]->count();
  const int dim = bottom[0]->count(2);
  const int channels = bottom[0]->channels();
  const Dtype* slope_data = this->blobs_[0]->cpu_data();

  // For in-place computation
  if (bottom[0] == top[0]) {
    caffe_cpu_copy(count, bottom_data, bottom_memory_.mutable_cpu_data());
  }

  // if channel_shared, channel index in the following computation becomes
  // always zero.
  const int div_factor = channel_shared_ ? channels : 1;
  for (int i = 0; i < count; ++i) {
    int c = (i / dim) % channels / div_factor;
    top_data[i] = std::max(bottom_data[i], Dtype(0))
        + slope_data[c] * std::min(bottom_data[i], Dtype(0));
  }
}
Example #25
0
TYPED_TEST(GemmTest, TestGemmCPUGPU) {
  DeviceContext *dc = Caffe::GetDefaultDeviceContext();

  Blob<TypeParam> A(1, 1, 2, 3, Caffe::GetDefaultDeviceContext());
  Blob<TypeParam> B(1, 1, 3, 4, Caffe::GetDefaultDeviceContext());
  Blob<TypeParam> C(1, 1, 2, 4, Caffe::GetDefaultDeviceContext());
  TypeParam data[12] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
  TypeParam A_reshape_data[6] = {1, 4, 2, 5, 3, 6};
  TypeParam B_reshape_data[12] = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12};
  TypeParam result[8] = {38, 44, 50, 56, 83, 98, 113, 128};

  caffe_cpu_copy(6, data, A.mutable_cpu_data());
  caffe_cpu_copy(12, data, B.mutable_cpu_data());

  // [1, 2, 3; 4 5 6] * [1, 2, 3, 4; 5, 6, 7, 8; 9, 10, 11, 12];
  caffe_cpu_gemm<TypeParam>(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1.,
      A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data());
  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(C.cpu_data()[i], result[i]);
  }


  if (dc->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    caffe_gpu_gemm<TypeParam>(CblasNoTrans, CblasNoTrans, 2, 4, 3, 1.,
      A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data());
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
    greentea_gpu_gemm<TypeParam>(dc->id(), CblasNoTrans, CblasNoTrans,
                                 2, 4, 3, 1.,
                                 (cl_mem)(A.gpu_data()), 0,
                                 (cl_mem)(B.gpu_data()), 0, 0.,
                                 (cl_mem)(C.mutable_gpu_data()), 0);
#endif  // USE_GREENTEA
  }

  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(C.cpu_data()[i], result[i]);
  }

  // Test when we have a transposed A
  A.Reshape(1, 1, 3, 2);
  caffe_cpu_copy(6, A_reshape_data, A.mutable_cpu_data());
  caffe_cpu_gemm<TypeParam>(CblasTrans, CblasNoTrans, 2, 4, 3, 1.,
      A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data());
  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(C.cpu_data()[i], result[i]);
  }

  if (dc->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
  caffe_gpu_gemm<TypeParam>(CblasTrans, CblasNoTrans, 2, 4, 3, 1.,
      A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data());
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
  greentea_gpu_gemm<TypeParam>(dc->id(), CblasTrans, CblasNoTrans,
                               2, 4, 3, 1.,
                               (cl_mem)(A.gpu_data()), 0,
                               (cl_mem)(B.gpu_data()), 0,
                               0., (cl_mem)(C.mutable_gpu_data()), 0);
#endif  // USE_GREENTEA
  }

  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(C.cpu_data()[i], result[i]);
  }

  // Test when we have a transposed A and a transposed B too
  B.Reshape(1, 1, 4, 3);
  caffe_cpu_copy(12, B_reshape_data, B.mutable_cpu_data());
  caffe_cpu_gemm<TypeParam>(CblasTrans, CblasTrans, 2, 4, 3, 1.,
      A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data());
  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(C.cpu_data()[i], result[i]);
  }

  if (dc->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    caffe_gpu_gemm<TypeParam>(CblasTrans, CblasTrans, 2, 4, 3, 1.,
      A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data());
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
  greentea_gpu_gemm<TypeParam>(dc->id(), CblasTrans, CblasTrans,
                               2, 4, 3, 1.,
                               (cl_mem)(A.gpu_data()), 0,
                               (cl_mem)(B.gpu_data()), 0, 0.,
                               (cl_mem)(C.mutable_gpu_data()), 0);
#endif  // USE_GREENTEA
  }

  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(C.cpu_data()[i], result[i]);
  }

  // Test when we have a transposed B
  A.Reshape(1, 1, 2, 3);
  caffe_cpu_copy(6, data, A.mutable_cpu_data());
  caffe_cpu_gemm<TypeParam>(CblasNoTrans, CblasTrans, 2, 4, 3, 1.,
      A.cpu_data(), B.cpu_data(), 0., C.mutable_cpu_data());
  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(C.cpu_data()[i], result[i]);
  }

  if (dc->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    caffe_gpu_gemm<TypeParam>(CblasNoTrans, CblasTrans, 2, 4, 3, 1.,
      A.gpu_data(), B.gpu_data(), 0., C.mutable_gpu_data());
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
    greentea_gpu_gemm<TypeParam>(dc->id(), CblasNoTrans, CblasTrans,
                                 2, 4, 3, 1.,
                                 (cl_mem)(A.gpu_data()), 0,
                                 (cl_mem)(B.gpu_data()), 0, 0.,
                                 (cl_mem)(C.mutable_gpu_data()), 0);
#endif  // USE_GREENTEA
  }

  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(C.cpu_data()[i], result[i]);
  }
}
Example #26
0
TYPED_TEST(GemmTest, TestGemvCPUGPU) {
  DeviceContext *dc = Caffe::GetDefaultDeviceContext();

  Blob<TypeParam> A(1, 1, 2, 3, Caffe::GetDefaultDeviceContext());
  Blob<TypeParam> x(1, 1, 1, 3, Caffe::GetDefaultDeviceContext());
  Blob<TypeParam> y(1, 1, 1, 2, Caffe::GetDefaultDeviceContext());
  TypeParam data[6] = {1, 2, 3, 4, 5, 6};
  TypeParam result_2[2] = {14, 32};
  TypeParam result_3[3] = {9, 12, 15};

  caffe_cpu_copy(6, data, A.mutable_cpu_data());
  caffe_cpu_copy(3, data, x.mutable_cpu_data());


  caffe_cpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.cpu_data(),
      x.cpu_data(), 0., y.mutable_cpu_data());
  for (int i = 0; i < 2; ++i) {
    EXPECT_EQ(y.cpu_data()[i], result_2[i]);
  }

  if (dc->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    caffe_gpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.gpu_data(),
      x.gpu_data(), 0., y.mutable_gpu_data());
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
    greentea_gpu_gemv<TypeParam>(dc->id(), CblasNoTrans,
                                 2, 3, 1.,
                                 (cl_mem)(A.gpu_data()), 0,
                                 (cl_mem)(x.gpu_data()), 0, 0.,
                                 (cl_mem)(y.mutable_gpu_data()), 0);
#endif  // USE_GREENTEA
  }

  for (int i = 0; i < 2; ++i) {
    EXPECT_EQ(y.cpu_data()[i], result_2[i]);
  }

  // Test transpose case
  caffe_cpu_copy(2, data, y.mutable_cpu_data());
  caffe_cpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.cpu_data(),
      y.cpu_data(), 0., x.mutable_cpu_data());
  for (int i = 0; i < 3; ++i) {
    EXPECT_EQ(x.cpu_data()[i], result_3[i]);
  }

  if (dc->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    caffe_gpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.gpu_data(),
      y.gpu_data(), 0., x.mutable_gpu_data());
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
    greentea_gpu_gemv<TypeParam>(dc->id(), CblasTrans,
                                 2, 3, 1.,
                                 (cl_mem)(A.gpu_data()), 0,
                                 (cl_mem)(y.gpu_data()), 0, 0.,
                                 (cl_mem)(x.mutable_gpu_data()), 0);
#endif  // USE_GREENTEA
  }

  for (int i = 0; i < 3; ++i) {
    EXPECT_EQ(x.cpu_data()[i], result_3[i]);
  }
}
void GradientChecker<Dtype>::CheckGradientSingle(
    Layer<Dtype>* layer, const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top, int_tp check_bottom, int_tp top_id,
    int_tp top_data_id,
    bool element_wise) {
  if (element_wise) {
    CHECK_EQ(0, layer->blobs().size());
    CHECK_LE(0, top_id);
    CHECK_LE(0, top_data_id);
    const int_tp top_count = top[top_id]->count();
    for (int_tp blob_id = 0; blob_id < bottom.size(); ++blob_id) {
      CHECK_EQ(top_count, bottom[blob_id]->count());
    }
  }
  // First, figure out what blobs we need to check against, and zero init
  // parameter blobs.
  vector<Blob<Dtype>*> blobs_to_check;
  vector<bool> propagate_down(bottom.size(), check_bottom == -1);
  for (int_tp i = 0; i < layer->blobs().size(); ++i) {
    Blob<Dtype>* blob = layer->blobs()[i].get();
    caffe_set(blob->count(), static_cast<Dtype>(0), blob->mutable_cpu_diff());
    blobs_to_check.push_back(blob);
  }
  if (check_bottom == -1) {
    for (int_tp i = 0; i < bottom.size(); ++i) {
      blobs_to_check.push_back(bottom[i]);
    }
  } else if (check_bottom >= 0) {
    CHECK_LT(check_bottom, bottom.size());
    blobs_to_check.push_back(bottom[check_bottom]);
    propagate_down[check_bottom] = true;
  }
  CHECK_GT(blobs_to_check.size(), 0)<< "No blobs to check.";
  // Compute the gradient analytically using Backward
  Caffe::set_random_seed(seed_, Caffe::GetDefaultDevice());
  // Ignore the loss from the layer (it's just the weighted sum of the losses
  // from the top blobs, whose gradients we may want to test individually).
  layer->Forward(bottom, top);
  // Get additional loss from the objective
  GetObjAndGradient(*layer, top, top_id, top_data_id);
  layer->Backward(top, propagate_down, bottom);
  // Store computed gradients for all checked blobs
  vector<shared_ptr<Blob<Dtype> > > computed_gradient_blobs(
      blobs_to_check.size());
  for (int_tp blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
    Blob<Dtype>* current_blob = blobs_to_check[blob_id];
    computed_gradient_blobs[blob_id].reset(new Blob<Dtype>());
    computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob);
    const int_tp count = blobs_to_check[blob_id]->count();
    const Dtype* diff = blobs_to_check[blob_id]->cpu_diff();
    Dtype* computed_gradients = computed_gradient_blobs[blob_id]
        ->mutable_cpu_data();

    caffe_cpu_copy(count, diff, computed_gradients);
  }
  // Compute derivative of top w.r.t. each bottom and parameter input using
  // finite differencing.
  // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
  for (int_tp blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
    Blob<Dtype>* current_blob = blobs_to_check[blob_id];
    const Dtype* computed_gradients =
        computed_gradient_blobs[blob_id]->cpu_data();
    // LOG(ERROR) << "Blob " << blob_id << ": checking "
    //     << current_blob->count() << " parameters.";
    for (int_tp feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
      // For an element-wise layer, we only need to do finite differencing to
      // compute the derivative of top[top_id][top_data_id] w.r.t.
      // bottom[blob_id][i] only for i == top_data_id.  For any other
      // i != top_data_id, we know the derivative is 0 by definition, and simply
      // check that that's true.
      Dtype estimated_gradient = 0;
      Dtype positive_objective = 0;
      Dtype negative_objective = 0;
      if (!element_wise || (feat_id == top_data_id)) {
        // Do finite differencing.
        // Compute loss with stepsize_ added to input.
        current_blob->mutable_cpu_data()[feat_id] += stepsize_;
        Caffe::set_random_seed(seed_, Caffe::GetDefaultDevice());
        layer->Forward(bottom, top);
        positive_objective = GetObjAndGradient(*layer, top, top_id,
                                               top_data_id);
        // Compute loss with stepsize_ subtracted from input.
        current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
        Caffe::set_random_seed(seed_, Caffe::GetDefaultDevice());
        layer->Forward(bottom, top);
        negative_objective = GetObjAndGradient(*layer, top, top_id,
                                               top_data_id);
        // Recover original input value.
        current_blob->mutable_cpu_data()[feat_id] += stepsize_;
        estimated_gradient = (positive_objective - negative_objective)
            / stepsize_ / 2.;
      }
      Dtype computed_gradient = computed_gradients[feat_id];
      Dtype feature = current_blob->cpu_data()[feat_id];
      // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
      //     << current_blob->cpu_diff()[feat_id];
      if (kink_ - kink_range_ > fabs(feature)
          || fabs(feature) > kink_ + kink_range_) {
        // We check relative accuracy, but for too small values, we threshold
        // the scale factor by 1.
        Dtype scale = std::max<Dtype>(
            std::max(fabs(computed_gradient), fabs(estimated_gradient)),
            Dtype(1.));
        EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale)
            << "debug: (top_id, top_data_id, blob_id, feat_id)=" << top_id
            << "," << top_data_id << "," << blob_id << "," << feat_id
            << "; feat = " << feature << "; objective+ = " << positive_objective
            << "; objective- = " << negative_objective;
      }
      // LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id];
      // LOG(ERROR) << "computed gradient: " << computed_gradient
      //    << " estimated_gradient: " << estimated_gradient;
    }
  }
}