void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty");
  // Reshape to loaded data.
  // Copy the data
  caffe_copy(batch->data_.count(), batch->data_.cpu_data(),
  DLOG(INFO) << "Prefetch copied";
  if (this->output_labels_) {
    // Reshape to loaded labels.
    // Copy the labels.
    caffe_copy(batch->label_.count(), batch->label_.cpu_data(),

void LabelSpecificAutoLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
                                                   const vector<bool>& propagate_down,
                                                   const vector<Blob<Dtype>*>& bottom) {
  if (top[0] != bottom[0] && propagate_down[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    int count = bottom[0]->count();
    caffe_copy(count, top_diff, bottom_diff);
TYPED_TEST(NeuronLayerTest, TestPReLUConsistencyReLU) {
  typedef typename TypeParam::Dtype Dtype;
  LayerParameter prelu_layer_param;
  LayerParameter relu_layer_param;
  PReLULayer<Dtype> prelu(prelu_layer_param);
  ReLULayer<Dtype> relu(relu_layer_param);
  // Set up blobs
  vector<Blob<Dtype>*> blob_bottom_vec_2;
  vector<Blob<Dtype>*> blob_top_vec_2;
  shared_ptr<Blob<Dtype> > blob_bottom_2(new Blob<Dtype>());
  shared_ptr<Blob<Dtype> > blob_top_2(new Blob<Dtype>());
  blob_bottom_2->CopyFrom(*this->blob_bottom_, false, true);
  // SetUp layers
  prelu.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
  relu.SetUp(blob_bottom_vec_2, blob_top_vec_2);
  // Check forward
  prelu.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
  relu.Forward(this->blob_bottom_vec_, blob_top_vec_2);
  for (int s = 0; s < blob_top_2->count(); ++s) {
    EXPECT_EQ(this->blob_top_->cpu_data()[s], blob_top_2->cpu_data()[s]);
  // Check backward
  shared_ptr<Blob<Dtype> > tmp_blob(new Blob<Dtype>());
  FillerParameter filler_param;
  GaussianFiller<Dtype> filler(filler_param);
  caffe_copy(blob_top_2->count(), tmp_blob->cpu_data(),
  caffe_copy(blob_top_2->count(), tmp_blob->cpu_data(),
  vector<bool> propagate_down;
  prelu.Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
  relu.Backward(blob_top_vec_2, propagate_down, blob_bottom_vec_2);
  for (int s = 0; s < blob_bottom_2->count(); ++s) {
    EXPECT_EQ(this->blob_bottom_->cpu_diff()[s], blob_bottom_2->cpu_diff()[s]);
Exemple #4
void UnifiedLayer<Dtype>::Forward_cpu(
  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  // put child layers' data blobs together to form the final blob
  // then fill in the label_index blob
  int shift_data = 0;
  for (int i = 0; i < childlayer_num_; ++i) {
    caffe_copy(bottom[i]->count(), bottom[i]->cpu_data(),
             top[0]->mutable_cpu_data() + shift_data);
    shift_data += bottom[i]->count();
void PowerLayer<Dtype>::Backward_gpu(
    const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    Dtype* bottom_diff = (bottom)[0]->mutable_gpu_diff();
    const int count = (bottom)[0]->count();
    const Dtype* top_diff = top[0]->gpu_diff();
    if (diff_scale_ == Dtype(0) || power_ == Dtype(1)) {
      caffe_gpu_set(count, diff_scale_, bottom_diff);
    } else {
      const Dtype* bottom_data = (bottom)[0]->gpu_data();
      // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1)
      //               = diff_scale * y / (shift + scale * x)
      if (power_ == Dtype(2)) {
        // Special case for y = (shift + scale * x)^2
        //     -> dy/dx = 2 * scale * (shift + scale * x)
        //              = diff_scale * shift + diff_scale * scale * x
            diff_scale_ * scale_,

        if (shift_ != Dtype(0)) {
          caffe_gpu_add_scalar(count, diff_scale_ * shift_, bottom_diff);
      } else if (shift_ == Dtype(0)) {
        // Special case for y = (scale * x)^power
        //     -> dy/dx = scale * power * (scale * x)^(power - 1)
        //              = scale * power * (scale * x)^power * (scale * x)^(-1)
        //              = power * y / x
        const Dtype* top_data = top[0]->gpu_data();
        caffe_gpu_div(count, top_data, bottom_data, bottom_diff);
        caffe_gpu_scal(count, power_, bottom_diff);
      } else {
        caffe_copy(count, bottom_data, bottom_diff);
        if (scale_ != Dtype(1)) {
          caffe_gpu_scal(count, scale_, bottom_diff);
        if (shift_ != Dtype(0)) {
          caffe_gpu_add_scalar(count, shift_, bottom_diff);
        const Dtype* top_data = top[0]->gpu_data();
        caffe_gpu_div<Dtype>(count, top_data, bottom_diff, bottom_diff);
        if (diff_scale_ != Dtype(1)) {
          caffe_gpu_scal(count, diff_scale_, bottom_diff);
    caffe_gpu_mul(count, top_diff, bottom_diff, bottom_diff);
Exemple #6
void SumLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  caffe_copy(bottom[0]->count(), bottom_data, top_data);
  for (int i = 1; i < bottom.size(); ++i) {
    const Dtype* bottom_data_i = bottom[i]->cpu_data();
    caffe_cpu_axpby(bottom[0]->count(), Dtype(1.0), bottom_data_i,
       Dtype(1.0), top_data); 
	Dtype DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
		vector<Blob<Dtype>*>* top) {
			// First, join the thread
			// First, join the thread 等待线程结束  
			// Copy the data
			// Copy the data拷贝数据到top,即该层的输出 
			caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
			if (output_labels_) {
				caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
			// Start a new prefetch thread
			return Dtype(0.);
Exemple #8
void HDF5OutputLayer<Dtype, MItype, MOtype>::Forward_cpu(
    const vector<Blob<MItype>*>& bottom,
    const vector<Blob<MOtype>*>& top) {
  CHECK_GE(bottom.size(), 2);
  CHECK_EQ(bottom[0]->num(), bottom[1]->num());
  data_blob_.Reshape(bottom[0]->num(), bottom[0]->channels(),
                     bottom[0]->height(), bottom[0]->width());
  label_blob_.Reshape(bottom[1]->num(), bottom[1]->channels(),
                     bottom[1]->height(), bottom[1]->width());
  const int_tp data_datum_dim = bottom[0]->count() / bottom[0]->num();
  const int_tp label_datum_dim = bottom[1]->count() / bottom[1]->num();

  for (int_tp i = 0; i < bottom[0]->num(); ++i) {
    caffe_copy(data_datum_dim, &bottom[0]->cpu_data()[i * data_datum_dim],
        &data_blob_.mutable_cpu_data()[i * data_datum_dim]);
    caffe_copy(label_datum_dim, &bottom[1]->cpu_data()[i * label_datum_dim],
        &label_blob_.mutable_cpu_data()[i * label_datum_dim]);
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  // First, join the thread
  DLOG(INFO) << "Thread joined";
  // Reshape to loaded data.
  top[0]->Reshape(this->prefetch_data_.num(), this->prefetch_data_.channels(),
      this->prefetch_data_.height(), this->prefetch_data_.width());
  // Copy the data
  caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
  DLOG(INFO) << "Prefetch copied";
  if (this->output_labels_) {
    caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
  // Start a new prefetch thread
  DLOG(INFO) << "CreatePrefetchThread";
Exemple #10
void LastRowLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
	const Dtype* top_diff = top[0]->cpu_diff();
	Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

	int num = bottom[0]->shape(0);
	int num1 = bottom[0]->shape(1);
	int channels = bottom[0]->shape(2);

	bottom_diff += bottom[0]->offset(num - 1);
	caffe_copy(channels * num1, top_diff, bottom_diff);	
TYPED_TEST(MathFunctionsTest, TestCopyGPU) {
  const int n = this->blob_bottom_->count();
  const TypeParam* bottom_data = this->blob_bottom_->gpu_data();
  TypeParam* top_data = this->blob_top_->mutable_gpu_data();
  caffe_copy(n, bottom_data, top_data);
  bottom_data = this->blob_bottom_->cpu_data();
  top_data = this->blob_top_->mutable_cpu_data();
  for (int i = 0; i < n; ++i) {
    EXPECT_EQ(bottom_data[i], top_data[i]);
void IgnoreOverlayLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  caffe_copy(bottom[1]->count(), bottom[1]->cpu_data(), top[0]->mutable_cpu_data());
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  for (int i = 0; i < bottom[0]->count(); ++i) {
    const int value = bottom_data[i];
    if (value == ignore_label_) {
      top_data[i] = static_cast<Dtype>(value);
void TileLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  for (int i = 0; i < outer_dim_; ++i) {
    for (int t = 0; t < tiles_; ++t) {
      caffe_copy(inner_dim_, bottom_data, top_data);
      top_data += inner_dim_;
    bottom_data += inner_dim_;
Exemple #14
void SelectLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  Dtype* top_data = top[0]->mutable_cpu_data();
  const Dtype* select_data = bottom[num_cand_]->cpu_data();
  for (int i = 0; i < outer_dim_; ++i) {
    const int index = static_cast<int>(select_data[i]);
    DCHECK_GE(index, 0);
    DCHECK_LT(index, num_cand_);
    caffe_copy(inner_dim_, bottom[index]->cpu_data() + inner_dim_*i, top_data);
    top_data += inner_dim_;
Exemple #15
void SumLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  for (int i=0; i<bottom.size(); i++){
      const Dtype* top_diff=top[0]->cpu_diff();
      Dtype* bottom_diff=bottom[i]->mutable_cpu_diff();
      caffe_copy(top[0]->count(), top_diff, bottom_diff);

Exemple #16
void Tensor<Dtype>::CopyChunkFrom(const Tensor& source, int count,
    int this_offset, int other_offset) {
  ASSERT(source.count() >= count + other_offset,
    "Chunk exceeds source memory: "
    << count << " + " << other_offset << " > " << source.count());
  ASSERT(this->count() >= count + this_offset, "Chunk exceeds target memory: "
    << count << " + " << this_offset << " > " << this->count());

  switch (mode()) {
  case Caffe::CPU:
    caffe_copy(count, source.cpu_mem() + other_offset,
        mutable_cpu_mem() + this_offset);
  case Caffe::GPU:
    caffe_copy(count, source.gpu_mem() + other_offset,
        mutable_gpu_mem() + this_offset);
    LOG(FATAL) << "Unknown caffe mode.";
Exemple #17
void BasePrefetchingInteractionDataLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
  // LOG(INFO) << "Forward_cpu";
  // First, join the thread
  // Copy the data
  caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
  if (this->output_labels_) {
    caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
  caffe_copy(prefetch_itact_data_.count(), prefetch_itact_data_.cpu_data(),
  caffe_copy(prefetch_itact_label_.count(), prefetch_itact_label_.cpu_data(),
  caffe_copy(prefetch_itact_count_.count(), prefetch_itact_count_.cpu_data(),
  // Start a new prefetch thread
Exemple #18
void LastRowLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
	const Dtype* bottom_data = bottom[0]->cpu_data();
	Dtype* top_data = top[0]->mutable_cpu_data();

	int num = bottom[0]->shape(0);
	int num1 = bottom[0]->shape(1);
	int channels = bottom[0]->shape(2);

	bottom_data += bottom[0]->offset(num - 1);
	caffe_copy(channels * num1, bottom_data, top_data);
		virtual void SetUp() {
			blob_bottom_->Reshape(2, 5, 2, 2);
			Dtype* bottom_data = blob_bottom_->mutable_cpu_data();
			Dtype data[] = { 7, 4, 29, 22, 20,
				26, 10, 21, 36, 39,
				12, 11, 24, 37, 15,
				8, 31, 34, 27, 5,
				0, 30, 14, 16, 1,
				6, 13, 3, 23, 28,
				9, 2, 32, 38, 19,
				17, 25, 35, 18, 33 };
			caffe_copy(blob_bottom_->count(), data, bottom_data);


			Dtype t1_data[] = { 0, 0, 29, 22, 0,
				0, 0, 0, 36, 39,
				0, 0, 0, 0, 0,
				0, 0, 0, 0, 0,
				0, 0, 0, 0, 0,
				0, 0, 0, 0, 0,
				0, 0, 32, 38, 19,
				0, 0, 0, 0, 33 };
			Dtype t3_data[] = { 0, 0, 29, 22, 0,
				0, 0, 21, 36, 39,
				0, 11, 24, 37, 15,
				0, 31, 34, 27, 0,
				0, 30, 14, 16, 0,
				0, 0, 0, 23, 0,
				0, 0, 32, 38, 19,
				17, 25, 35, 18, 33 };
			caffe_copy(t1->count(), t1_data, t1->mutable_cpu_data());
			caffe_copy(t3->count(), t3_data, t3->mutable_cpu_data());
			caffe_copy(t5->count(), data, t5->mutable_cpu_data());
void ConvolutionRistrettoLayer<Dtype>::Forward_cpu(
      const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  // Trim layer input
  if (this->phase_ == TEST) {
    for (int i = 0; i < bottom.size(); ++i) {
  // Trim weights
  caffe_copy(this->blobs_[0]->count(), this->blobs_[0]->cpu_data(),
  if (this->bias_term_) {
    caffe_copy(this->blobs_[1]->count(), this->blobs_[1]->cpu_data(),
  int rounding = this->phase_ == TEST ? this->rounding_ :
  this->QuantizeWeights_cpu(this->weights_quantized_, rounding,
  // Do forward propagation
  const Dtype* weight = this->weights_quantized_[0]->cpu_data();
  for (int i = 0; i < bottom.size(); ++i) {
    const Dtype* bottom_data = bottom[i]->cpu_data();
    Dtype* top_data = top[i]->mutable_cpu_data();
    for (int n = 0; n < this->num_; ++n) {
      this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
          top_data + n * this->top_dim_);
      if (this->bias_term_) {
        const Dtype* bias = this->weights_quantized_[1]->cpu_data();
        this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
    // Trim layer output
    if (this->phase_ == TEST) {
      this->QuantizeLayerOutputs_cpu(top_data, top[i]->count());
Exemple #21
void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
             const vector<Blob<Dtype>*>& top,
             const int* offsets,
             vector<int> indices,
             int cur_dim,
             const Dtype* src_data,
             Dtype* dest_data,
             bool is_forward) {
  if (cur_dim + 1 < top[0]->num_axes()) {
    // We are not yet at the final dimension, call copy recursively
    for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
      indices[cur_dim] = i;
      crop_copy(bottom, top, offsets, indices, cur_dim+1,
                src_data, dest_data, is_forward);
  } else {
    // We are at the last dimensions, which is stored continuously in memory
    // prepare index vector reduced(red) and with offsets(off)
    std::vector<int> ind_red(cur_dim, 0);
    std::vector<int> ind_off(cur_dim+1, 0);
    for (int j = 0; j < cur_dim; ++j) {
      ind_red[j] = indices[j];
      ind_off[j] = indices[j] + offsets[j];
    ind_off[cur_dim] = offsets[cur_dim];
    // do the copy
    if (is_forward) {
          src_data + bottom[0]->offset(ind_off),
          dest_data + top[0]->offset(ind_red));
    } else {
      // in the backwards pass the src_data is top_diff
      // and the dest_data is bottom_diff
          src_data + top[0]->offset(ind_red),
          dest_data + bottom[0]->offset(ind_off));
	void MaskingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
		CHECK_GE(this->blobs_.size(), 1);

		// TODO: check gradient formulas (http://ufldl.stanford.edu/tutorial/supervised/MultiLayerNeuralNetworks/)

		if (stable_prod_grad_) {
			if (propagate_down[0]) {
				// Gradient with respect to bottom data
				caffe_mul(top[0]->count(), this->blobs_[0]->cpu_data(), top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff()); // d_i = d_(i+1) .* w

			// Gradient with respect to weights
			caffe_mul(top[0]->count(), bottom[0]->cpu_data(), top[0]->cpu_diff(), this->blobs_[0]->mutable_cpu_diff()); // d_i = d_(i+1) .* in

			// Gradient with respect to bias
			if (bias_term_) {
				// TODO: check whether there are any smart pointer tricks which can replace the copying overhead
				caffe_copy(top[0]->count(), top[0]->cpu_diff(), this->blobs_[1]->mutable_cpu_diff()); // d_i = d_(i+1)
		} else {
			// less stable gradient computation method inspired by elementwise layer, this is just for comparison/debugging purposes

			if (propagate_down[0]) {
				// Gradient with respect to bottom data
				caffe_div(top[0]->count(), top[0]->cpu_data(), bottom[0]->cpu_data(), bottom[0]->mutable_cpu_diff());
				caffe_mul(top[0]->count(), bottom[0]->cpu_diff(), top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff());

			// Gradient with respect to weights
			caffe_div(top[0]->count(), top[0]->cpu_data(), this->blobs_[0]->cpu_data(), this->blobs_[0]->mutable_cpu_diff());
			caffe_mul(top[0]->count(), this->blobs_[0]->cpu_diff(), top[0]->cpu_diff(), this->blobs_[0]->mutable_cpu_diff());

			// Gradient with respect to bias
			if (bias_term_) {
				caffe_copy(top[0]->count(), top[0]->cpu_diff(), this->blobs_[1]->mutable_cpu_diff()); // d_i = d_(i+1)
Exemple #23
void RecurrentLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  DCHECK_EQ(recur_input_blobs_.size(), recur_output_blobs_.size());
  for (int i = 0; i < recur_input_blobs_.size(); ++i) {
    const int count = recur_input_blobs_[i]->count();
    DCHECK_EQ(count, recur_output_blobs_[i]->count());
    const Dtype* timestep_T_data = recur_output_blobs_[i]->cpu_data();
    Dtype* timestep_0_data = recur_input_blobs_[i]->mutable_cpu_data();
    caffe_copy(count, timestep_T_data, timestep_0_data);

Exemple #24
void UnifiedLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  // copy back diff, dispatch them to child layers, scale them back
  int shift_diff = 0;
  for (int i = 0; i < childlayer_num_; ++i) {
    // dispatch diff: diff * (batch_size_sum/batch_size[i])
    caffe_copy(bottom[i]->count(), top[0]->cpu_diff() + shift_diff,
      / label_index_[i]);
    shift_diff += bottom[i]->count();
Exemple #25
TYPED_TEST(GemmTest, TestGemvCPUGPU) {
  Blob<TypeParam> A(1, 1, 2, 3);
  Blob<TypeParam> x(1, 1, 1, 3);
  Blob<TypeParam> y(1, 1, 1, 2);
  TypeParam data[6] = {1, 2, 3, 4, 5, 6};
  TypeParam result_2[2] = {14, 32};
  TypeParam result_3[3] = {9, 12, 15};
  caffe_copy(6, data, A.mutable_cpu_data());
  caffe_copy(3, data, x.mutable_cpu_data());

  if (sizeof(TypeParam) == 4 || CAFFE_TEST_CUDA_PROP.major >= 2) {
    caffe_cpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.cpu_data(),
        x.cpu_data(), 0., y.mutable_cpu_data());
    for (int i = 0; i < 2; ++i) {
      EXPECT_EQ(y.cpu_data()[i], result_2[i]);
    caffe_gpu_gemv<TypeParam>(CblasNoTrans, 2, 3, 1., A.gpu_data(),
        x.gpu_data(), 0., y.mutable_gpu_data());
    for (int i = 0; i < 2; ++i) {
      EXPECT_EQ(y.cpu_data()[i], result_2[i]);

    // Test transpose case
    caffe_copy(2, data, y.mutable_cpu_data());
    caffe_cpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.cpu_data(),
        y.cpu_data(), 0., x.mutable_cpu_data());
    for (int i = 0; i < 3; ++i) {
      EXPECT_EQ(x.cpu_data()[i], result_3[i]);
    caffe_gpu_gemv<TypeParam>(CblasTrans, 2, 3, 1., A.gpu_data(),
        y.gpu_data(), 0., x.mutable_gpu_data());
    for (int i = 0; i < 3; ++i) {
      EXPECT_EQ(x.cpu_data()[i], result_3[i]);
  } else {
    LOG(ERROR) << "Skipping test due to old architecture.";
Exemple #26
void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
  if (blob_mode_ == BlobProto_BlobMode_GLOBAL) {
    if (!copy_diff) {
      LOG(FATAL) << "Currently Petuum Caffe does not support "
                 << "copying data to blobs with GLOBAL mode";
    } // TODO: support CopyFrom( copy_diff == false )
  if (num_ != source.num() || channels_ != source.channels() ||
      height_ != source.height() || width_ != source.width()) {
    if (reshape) {
      Reshape(source.num(), source.channels(), source.height(), source.width());
    } else {
      LOG(FATAL) << "Trying to copy blobs of different sizes.";
  switch (Caffe::mode()) {
  case Caffe::GPU:
    if (copy_diff) {
      caffe_copy(count_, source.gpu_diff(),
    } else {
      caffe_copy(count_, source.gpu_data(),
  case Caffe::CPU:
    if (copy_diff) {
      caffe_copy(count_, source.cpu_diff(),
    } else {
      caffe_copy(count_, source.cpu_data(),
    LOG(FATAL) << "Unknown caffe mode.";
Exemple #27
void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
             const vector<Blob<Dtype>*>& top,
             const vector<int>& offsets,
             const Dtype* src_data,
             Dtype* dest_data) {
  int last_dim = top[0]->num_axes() - 1;
  int copy_count = top[0]->count() / top[0]->shape(last_dim);

#ifdef _OPENMP
  #pragma omp parallel for
  for (int i = 0; i < copy_count; ++i) {
    // prepare index vector reduced(red) and with offsets(off)
    std::vector<int> ind_red(last_dim, 0);
    std::vector<int> ind_off(last_dim+1, 0);
    int cur_iteration = i;
    for (int j = last_dim - 1; j >=0; --j) {
      int index = cur_iteration % top[0]->shape(j);
      cur_iteration /= top[0]->shape(j);
      ind_red[j] = index;
      ind_off[j] = index + offsets[j];
    ind_off[last_dim] = offsets[last_dim];
    // Last dimensions stored continously in memory
    // do the copy
    if (is_forward) {
          src_data + bottom[0]->offset(ind_off),
          dest_data + top[0]->offset(ind_red));
    } else {
      // in the backwards pass the src_data is top_diff
      // and the dest_data is bottom_diff
          src_data + top[0]->offset(ind_red),
          dest_data + bottom[0]->offset(ind_off));
Exemple #28
void SoftmaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  Dtype* scale_data = scale_.mutable_cpu_data();
  int channels = bottom[0]->shape(softmax_axis_);
  int dim = bottom[0]->count() / outer_num_;
  caffe_copy(bottom[0]->count(), bottom_data, top_data);
  // We need to subtract the max to avoid numerical issues, compute the exp,
  // and then normalize.
  for (int i = 0; i < outer_num_; ++i) {
    // initialize scale_data to the first plane
    caffe_copy(inner_num_, bottom_data + i * dim, scale_data);
    for (int j = 0; j < channels; j++) {
      for (int k = 0; k < inner_num_; k++) {
        scale_data[k] = std::max(scale_data[k],
            bottom_data[i * dim + j * inner_num_ + k]);
    } //在每张图像中,沿着通道轴上取最大像素值。虽然scale_data所指向内存区域可存储的数据量为outer_num_×1×inner_num_,但每次也只是更新scale_data最前面的inner_num_个元素。scale_只是用来存储中间变量。

    // subtraction
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, inner_num_,
        1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data);
    // exponentiation
    caffe_exp<Dtype>(dim, top_data, top_data);
    // sum after exp
    caffe_cpu_gemv<Dtype>(CblasTrans, channels, inner_num_, 1.,
        top_data, sum_multiplier_.cpu_data(), 0., scale_data);
    // division
    for (int j = 0; j < channels; j++) {
      caffe_div(inner_num_, top_data, scale_data, top_data);
      top_data += inner_num_;
Exemple #29
void L2NormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const Dtype* top_diff = top[0]->cpu_diff();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* norm_scale = norm_.cpu_data();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  const int n = top[0]->num();
  const int d = top[0]->count() / n;
  caffe_copy(bottom[0]->count(), top_diff, bottom_diff);
  for (int i=0; i<n; ++i) {
    Dtype a = caffe_cpu_dot(d, top_data+i*d, top_diff+i*d);
    caffe_cpu_axpby(d, Dtype(-1) * a * norm_scale[i], top_data + i*d, norm_scale[i], bottom_diff + i*d);
Exemple #30
void CropLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  for (int n = 0; n < top[0]->num(); ++n) {
    for (int c = 0; c < top[0]->channels(); ++c) {
      for (int h = 0; h < top[0]->height(); ++h) {
            bottom_data + bottom[0]->offset(n, c, crop_h_ + h, crop_w_),
            top_data + top[0]->offset(n, c, h));